diff options
Diffstat (limited to 'lib')
48 files changed, 1545 insertions, 1183 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index eaaad4d85bf2..dc1ab2ed1dc6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -118,6 +118,13 @@ config INDIRECT_IOMEM_FALLBACK  	  mmio accesses when the IO memory address is not a registered  	  emulated region. +config TRACE_MMIO_ACCESS +	bool "Register read/write tracing" +	depends on TRACING && ARCH_HAVE_TRACE_MMIO_ACCESS +	help +	  Create tracepoints for MMIO read/write operations. These trace events +	  can be used for logging all MMIO read/write operations. +  source "lib/crypto/Kconfig"  config LIB_MEMNEQ @@ -685,15 +692,6 @@ config STACKDEPOT_ALWAYS_INIT  	bool  	select STACKDEPOT -config STACK_HASH_ORDER -	int "stack depot hash size (12 => 4KB, 20 => 1024KB)" -	range 12 20 -	default 20 -	depends on STACKDEPOT -	help -	 Select the hash size as a power of 2 for the stackdepot hash table. -	 Choose a lower value to reduce the memory impact. -  config REF_TRACKER  	bool  	depends on STACKTRACE_SUPPORT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2e24db4bff19..072e4b289c13 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -498,7 +498,7 @@ config STACK_VALIDATION  	  runtime stack traces are more reliable.  	  For more information, see -	  tools/objtool/Documentation/stack-validation.txt. +	  tools/objtool/Documentation/objtool.txt.  config NOINSTR_VALIDATION  	bool @@ -699,6 +699,14 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT  	help  	  Debug objects boot parameter default value +config SHRINKER_DEBUG +	bool "Enable shrinker debugging support" +	depends on DEBUG_FS +	help +	  Say Y to enable the shrinker debugfs interface which provides +	  visibility into the kernel memory shrinkers subsystem. +	  Disable it to avoid an extra memory footprint. +  config HAVE_DEBUG_KMEMLEAK  	bool @@ -1560,7 +1568,7 @@ config DEBUG_KOBJECT_RELEASE  	help  	  kobjects are reference counted objects.  This means that their  	  last reference count put is not predictable, and the kobject can -	  live on past the point at which a driver decides to drop it's +	  live on past the point at which a driver decides to drop its  	  initial reference to the kobject gained on allocation.  An  	  example of this would be a struct device which has just been  	  unregistered. @@ -2021,6 +2029,15 @@ config LKDTM  	Documentation on how to use the module can be found in  	Documentation/fault-injection/provoke-crashes.rst +config TEST_CPUMASK +	tristate "cpumask tests" if !KUNIT_ALL_TESTS +	depends on KUNIT +	default KUNIT_ALL_TESTS +	help +	  Enable to turn on cpumask tests, running at boot or module load time. + +	  If unsure, say N. +  config TEST_LIST_SORT  	tristate "Linked list sorting test" if !KUNIT_ALL_TESTS  	depends on KUNIT diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index a9f7eb047768..fd15230a703b 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -84,6 +84,9 @@ config UBSAN_SHIFT  config UBSAN_DIV_ZERO  	bool "Perform checking for integer divide-by-zero"  	depends on $(cc-option,-fsanitize=integer-divide-by-zero) +	# https://github.com/ClangBuiltLinux/linux/issues/1657 +	# https://github.com/llvm/llvm-project/issues/56289 +	depends on !CC_IS_CLANG  	help  	  This option enables -fsanitize=integer-divide-by-zero which checks  	  for integer division by zero. This is effectively redundant with the diff --git a/lib/Makefile b/lib/Makefile index f99bf61f8bbc..c95212141928 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,15 +29,14 @@ endif  lib-y := ctype.o string.o vsprintf.o cmdline.o \  	 rbtree.o radix-tree.o timerqueue.o xarray.o \ -	 idr.o extable.o sha1.o irq_regs.o argv_split.o \ +	 idr.o extable.o irq_regs.o argv_split.o \  	 flex_proportions.o ratelimit.o show_mem.o \  	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \  	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ -	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \ -	 buildid.o +	 nmi_backtrace.o win_minmax.o memcat_p.o \ +	 buildid.o cpumask.o  lib-$(CONFIG_PRINTK) += dump_stack.o -lib-$(CONFIG_SMP) += cpumask.o  lib-y	+= kobject.o klist.o  obj-y	+= lockref.o @@ -46,7 +45,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \  	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \  	 list_sort.o uuid.o iov_iter.o clz_ctz.o \  	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \ -	 percpu-refcount.o rhashtable.o \ +	 percpu-refcount.o rhashtable.o base64.o \  	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \  	 generic-radix-tree.o  obj-$(CONFIG_STRING_SELFTEST) += test_string.o @@ -100,6 +99,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o  obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o  obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o  obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o +obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o  CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)  obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o  # @@ -151,6 +151,8 @@ lib-y += logic_pio.o  lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o +obj-$(CONFIG_TRACE_MMIO_ACCESS) += trace_readwrite.o +  obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o  obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/base64.c b/lib/base64.c new file mode 100644 index 000000000000..b736a7a431c5 --- /dev/null +++ b/lib/base64.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * base64.c - RFC4648-compliant base64 encoding + * + * Copyright (c) 2020 Hannes Reinecke, SUSE + * + * Based on the base64url routines from fs/crypto/fname.c + * (which are using the URL-safe base64 encoding), + * modified to use the standard coding table from RFC4648 section 4. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/export.h> +#include <linux/string.h> +#include <linux/base64.h> + +static const char base64_table[65] = +	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + * base64_encode() - base64-encode some binary data + * @src: the binary data to encode + * @srclen: the length of @src in bytes + * @dst: (output) the base64-encoded string.  Not NUL-terminated. + * + * Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified + * by RFC 4648, including the  '='-padding. + * + * Return: the length of the resulting base64-encoded string in bytes. + */ +int base64_encode(const u8 *src, int srclen, char *dst) +{ +	u32 ac = 0; +	int bits = 0; +	int i; +	char *cp = dst; + +	for (i = 0; i < srclen; i++) { +		ac = (ac << 8) | src[i]; +		bits += 8; +		do { +			bits -= 6; +			*cp++ = base64_table[(ac >> bits) & 0x3f]; +		} while (bits >= 6); +	} +	if (bits) { +		*cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; +		bits -= 6; +	} +	while (bits < 0) { +		*cp++ = '='; +		bits += 2; +	} +	return cp - dst; +} +EXPORT_SYMBOL_GPL(base64_encode); + +/** + * base64_decode() - base64-decode a string + * @src: the string to decode.  Doesn't need to be NUL-terminated. + * @srclen: the length of @src in bytes + * @dst: (output) the decoded binary data + * + * Decodes a string using base64 encoding, i.e. the "Base 64 Encoding" + * specified by RFC 4648, including the  '='-padding. + * + * This implementation hasn't been optimized for performance. + * + * Return: the length of the resulting decoded binary data in bytes, + *	   or -1 if the string isn't a valid base64 string. + */ +int base64_decode(const char *src, int srclen, u8 *dst) +{ +	u32 ac = 0; +	int bits = 0; +	int i; +	u8 *bp = dst; + +	for (i = 0; i < srclen; i++) { +		const char *p = strchr(base64_table, src[i]); + +		if (src[i] == '=') { +			ac = (ac << 6); +			bits += 6; +			if (bits >= 8) +				bits -= 8; +			continue; +		} +		if (p == NULL || src[i] == 0) +			return -1; +		ac = (ac << 6) | (p - base64_table); +		bits += 6; +		if (bits >= 8) { +			bits -= 8; +			*bp++ = (u8)(ac >> bits); +		} +	} +	if (ac & ((1 << bits) - 1)) +		return -1; +	return bp - dst; +} +EXPORT_SYMBOL_GPL(base64_decode); diff --git a/lib/bitmap.c b/lib/bitmap.c index b18e31ea6e66..488e6c3e5acc 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -237,7 +237,7 @@ void bitmap_cut(unsigned long *dst, const unsigned long *src,  }  EXPORT_SYMBOL(bitmap_cut); -int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,  				const unsigned long *bitmap2, unsigned int bits)  {  	unsigned int k; @@ -275,7 +275,7 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,  }  EXPORT_SYMBOL(__bitmap_xor); -int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,  				const unsigned long *bitmap2, unsigned int bits)  {  	unsigned int k; @@ -333,10 +333,9 @@ bool __bitmap_subset(const unsigned long *bitmap1,  }  EXPORT_SYMBOL(__bitmap_subset); -int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) +unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)  { -	unsigned int k, lim = bits/BITS_PER_LONG; -	int w = 0; +	unsigned int k, lim = bits/BITS_PER_LONG, w = 0;  	for (k = 0; k < lim; k++)  		w += hweight_long(bitmap[k]); @@ -1564,7 +1563,7 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits)  	/* Clear tail bits in the last element of array beyond nbits. */  	if (nbits % 64) -		buf[-1] &= GENMASK_ULL(nbits % 64, 0); +		buf[-1] &= GENMASK_ULL((nbits - 1) % 64, 0);  }  EXPORT_SYMBOL(bitmap_to_arr64);  #endif diff --git a/lib/btree.c b/lib/btree.c index b4cf08a5c267..a82100c73b55 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -238,7 +238,7 @@ static int keyzero(struct btree_geo *geo, unsigned long *key)  	return 1;  } -void *btree_lookup(struct btree_head *head, struct btree_geo *geo, +static void *btree_lookup_node(struct btree_head *head, struct btree_geo *geo,  		unsigned long *key)  {  	int i, height = head->height; @@ -257,7 +257,16 @@ void *btree_lookup(struct btree_head *head, struct btree_geo *geo,  		if (!node)  			return NULL;  	} +	return node; +} +void *btree_lookup(struct btree_head *head, struct btree_geo *geo, +		unsigned long *key) +{ +	int i; +	unsigned long *node; + +	node = btree_lookup_node(head, geo, key);  	if (!node)  		return NULL; @@ -271,23 +280,10 @@ EXPORT_SYMBOL_GPL(btree_lookup);  int btree_update(struct btree_head *head, struct btree_geo *geo,  		 unsigned long *key, void *val)  { -	int i, height = head->height; -	unsigned long *node = head->node; - -	if (height == 0) -		return -ENOENT; - -	for ( ; height > 1; height--) { -		for (i = 0; i < geo->no_pairs; i++) -			if (keycmp(geo, node, i, key) <= 0) -				break; -		if (i == geo->no_pairs) -			return -ENOENT; -		node = bval(geo, node, i); -		if (!node) -			return -ENOENT; -	} +	int i; +	unsigned long *node; +	node = btree_lookup_node(head, geo, key);  	if (!node)  		return -ENOENT; diff --git a/lib/cpumask.c b/lib/cpumask.c index a971a82d2f43..8baeb37e23d3 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -8,61 +8,6 @@  #include <linux/numa.h>  /** - * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @srcp: the cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set. - */ -unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ -	/* -1 is a legal arg here. */ -	if (n != -1) -		cpumask_check(n); -	return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); -} -EXPORT_SYMBOL(cpumask_next); - -/** - * cpumask_next_and - get the next cpu in *src1p & *src2p - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @src1p: the first cpumask pointer - * @src2p: the second cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set in both. - */ -int cpumask_next_and(int n, const struct cpumask *src1p, -		     const struct cpumask *src2p) -{ -	/* -1 is a legal arg here. */ -	if (n != -1) -		cpumask_check(n); -	return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), -		nr_cpumask_bits, n + 1); -} -EXPORT_SYMBOL(cpumask_next_and); - -/** - * cpumask_any_but - return a "random" in a cpumask, but not this one. - * @mask: the cpumask to search - * @cpu: the cpu to ignore. - * - * Often used to find any cpu but smp_processor_id() in a mask. - * Returns >= nr_cpu_ids if no cpus set. - */ -int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) -{ -	unsigned int i; - -	cpumask_check(cpu); -	for_each_cpu(i, mask) -		if (i != cpu) -			break; -	return i; -} -EXPORT_SYMBOL(cpumask_any_but); - -/**   * cpumask_next_wrap - helper to implement for_each_cpu_wrap   * @n: the cpu prior to the place to search   * @mask: the cpumask pointer @@ -74,9 +19,9 @@ EXPORT_SYMBOL(cpumask_any_but);   * Note: the @wrap argument is required for the start condition when   * we cannot assume @start is set in @mask.   */ -int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)  { -	int next; +	unsigned int next;  again:  	next = cpumask_next(n, mask); @@ -125,34 +70,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)  }  EXPORT_SYMBOL(alloc_cpumask_var_node); -bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) -{ -	return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); -} -EXPORT_SYMBOL(zalloc_cpumask_var_node); - -/** - * alloc_cpumask_var - allocate a struct cpumask - * @mask: pointer to cpumask_var_t where the cpumask is returned - * @flags: GFP_ flags - * - * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is - * a nop returning a constant 1 (in <linux/cpumask.h>). - * - * See alloc_cpumask_var_node. - */ -bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) -{ -	return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); -} -EXPORT_SYMBOL(alloc_cpumask_var); - -bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) -{ -	return alloc_cpumask_var(mask, flags | __GFP_ZERO); -} -EXPORT_SYMBOL(zalloc_cpumask_var); -  /**   * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.   * @mask: pointer to cpumask_var_t where the cpumask is returned @@ -192,6 +109,7 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)  }  #endif +#if NR_CPUS > 1  /**   * cpumask_local_spread - select the i'th cpu with local numa cpu's first   * @i: index number @@ -205,7 +123,7 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)   */  unsigned int cpumask_local_spread(unsigned int i, int node)  { -	int cpu; +	unsigned int cpu;  	/* Wrap: we always want a cpu. */  	i %= num_online_cpus(); @@ -243,10 +161,10 @@ static DEFINE_PER_CPU(int, distribute_cpu_mask_prev);   *   * Returns >= nr_cpu_ids if the intersection is empty.   */ -int cpumask_any_and_distribute(const struct cpumask *src1p, +unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,  			       const struct cpumask *src2p)  { -	int next, prev; +	unsigned int next, prev;  	/* NOTE: our first selection will skip 0. */  	prev = __this_cpu_read(distribute_cpu_mask_prev); @@ -262,9 +180,9 @@ int cpumask_any_and_distribute(const struct cpumask *src1p,  }  EXPORT_SYMBOL(cpumask_any_and_distribute); -int cpumask_any_distribute(const struct cpumask *srcp) +unsigned int cpumask_any_distribute(const struct cpumask *srcp)  { -	int next, prev; +	unsigned int next, prev;  	/* NOTE: our first selection will skip 0. */  	prev = __this_cpu_read(distribute_cpu_mask_prev); @@ -279,3 +197,4 @@ int cpumask_any_distribute(const struct cpumask *srcp)  	return next;  }  EXPORT_SYMBOL(cpumask_any_distribute); +#endif /* NR_CPUS */ diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 2082af43d51f..9ff549f63540 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -121,6 +121,9 @@ config CRYPTO_LIB_CHACHA20POLY1305  	select CRYPTO_LIB_POLY1305  	select CRYPTO_ALGAPI +config CRYPTO_LIB_SHA1 +	tristate +  config CRYPTO_LIB_SHA256  	tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 26be2bbe09c5..919cbb2c220d 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -34,6 +34,9 @@ libpoly1305-y					:= poly1305-donna32.o  libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128)	:= poly1305-donna64.o  libpoly1305-y					+= poly1305.o +obj-$(CONFIG_CRYPTO_LIB_SHA1)			+= libsha1.o +libsha1-y					:= sha1.o +  obj-$(CONFIG_CRYPTO_LIB_SHA256)			+= libsha256.o  libsha256-y					:= sha256.o diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 409e4b728770..7d77dea15587 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -4,6 +4,8 @@   */  #include <crypto/internal/blake2s.h> +#include <linux/kernel.h> +#include <linux/random.h>  #include <linux/string.h>  /* @@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)  		}  	} +	for (i = 0; i < 32; ++i) { +		enum { TEST_ALIGNMENT = 16 }; +		u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] +					__aligned(TEST_ALIGNMENT); +		u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; +		struct blake2s_state state1, state2; + +		get_random_bytes(blocks, sizeof(blocks)); +		get_random_bytes(&state, sizeof(state)); + +#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \ +    defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) +		memcpy(&state1, &state, sizeof(state1)); +		memcpy(&state2, &state, sizeof(state2)); +		blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE); +		blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE); +		if (memcmp(&state1, &state2, sizeof(state1))) { +			pr_err("blake2s random compress self-test %d: FAIL\n", +			       i + 1); +			success = false; +		} +#endif + +		memcpy(&state1, &state, sizeof(state1)); +		blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE); +		for (l = 1; l < TEST_ALIGNMENT; ++l) { +			memcpy(unaligned_block + l, blocks, +			       BLAKE2S_BLOCK_SIZE); +			memcpy(&state2, &state, sizeof(state2)); +			blake2s_compress(&state2, unaligned_block + l, 1, +					 BLAKE2S_BLOCK_SIZE); +			if (memcmp(&state1, &state2, sizeof(state1))) { +				pr_err("blake2s random compress align %d self-test %d: FAIL\n", +				       l, i + 1); +				success = false; +			} +		} +	} +  	return success;  } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c71c09621c09..98e688c6d891 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,16 +16,44 @@  #include <linux/init.h>  #include <linux/bug.h> +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ +	state->f[0] = -1; +} +  void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)  { -	__blake2s_update(state, in, inlen, false); +	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + +	if (unlikely(!inlen)) +		return; +	if (inlen > fill) { +		memcpy(state->buf + state->buflen, in, fill); +		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); +		state->buflen = 0; +		in += fill; +		inlen -= fill; +	} +	if (inlen > BLAKE2S_BLOCK_SIZE) { +		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); +		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); +		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); +		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); +	} +	memcpy(state->buf + state->buflen, in, inlen); +	state->buflen += inlen;  }  EXPORT_SYMBOL(blake2s_update);  void blake2s_final(struct blake2s_state *state, u8 *out)  {  	WARN_ON(IS_ENABLED(DEBUG) && !out); -	__blake2s_final(state, out, false); +	blake2s_set_lastblock(state); +	memset(state->buf + state->buflen, 0, +	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ +	blake2s_compress(state, state->buf, 1, state->buflen); +	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); +	memcpy(out, state->h, state->outlen);  	memzero_explicit(state, sizeof(*state));  }  EXPORT_SYMBOL(blake2s_final); @@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)  	return 0;  } -static void __exit blake2s_mod_exit(void) -{ -} -  module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit);  MODULE_LICENSE("GPL v2");  MODULE_DESCRIPTION("BLAKE2s hash function");  MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/lib/sha1.c b/lib/crypto/sha1.c index 0494766fc574..1aebe7be9401 100644 --- a/lib/sha1.c +++ b/lib/crypto/sha1.c @@ -8,6 +8,7 @@  #include <linux/kernel.h>  #include <linux/export.h> +#include <linux/module.h>  #include <linux/bitops.h>  #include <linux/string.h>  #include <crypto/sha1.h> @@ -135,3 +136,5 @@ void sha1_init(__u32 *buf)  	buf[4] = 0xc3d2e1f0;  }  EXPORT_SYMBOL(sha1_init); + +MODULE_LICENSE("GPL"); diff --git a/lib/devres.c b/lib/devres.c index 14664bbb4875..55eb07e80cbb 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -29,7 +29,8 @@ static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,  {  	void __iomem **ptr, *addr = NULL; -	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); +	ptr = devres_alloc_node(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL, +				dev_to_node(dev));  	if (!ptr)  		return NULL; @@ -292,7 +293,8 @@ void __iomem *devm_ioport_map(struct device *dev, unsigned long port,  {  	void __iomem **ptr, *addr; -	ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL); +	ptr = devres_alloc_node(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL, +				dev_to_node(dev));  	if (!ptr)  		return NULL; @@ -366,7 +368,8 @@ void __iomem * const *pcim_iomap_table(struct pci_dev *pdev)  	if (dr)  		return dr->table; -	new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL); +	new_dr = devres_alloc_node(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL, +				   dev_to_node(&pdev->dev));  	if (!new_dr)  		return NULL;  	dr = devres_get(&pdev->dev, new_dr, NULL, NULL); @@ -548,7 +551,8 @@ int devm_arch_phys_wc_add(struct device *dev, unsigned long base, unsigned long  	int *mtrr;  	int ret; -	mtrr = devres_alloc(devm_arch_phys_ac_add_release, sizeof(*mtrr), GFP_KERNEL); +	mtrr = devres_alloc_node(devm_arch_phys_ac_add_release, sizeof(*mtrr), GFP_KERNEL, +				 dev_to_node(dev));  	if (!mtrr)  		return -ENOMEM; @@ -593,7 +597,8 @@ int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start,  	struct arch_io_reserve_memtype_wc_devres *dr;  	int ret; -	dr = devres_alloc(devm_arch_io_free_memtype_wc_release, sizeof(*dr), GFP_KERNEL); +	dr = devres_alloc_node(devm_arch_io_free_memtype_wc_release, sizeof(*dr), GFP_KERNEL, +			       dev_to_node(dev));  	if (!dr)  		return -ENOMEM; diff --git a/lib/error-inject.c b/lib/error-inject.c index 2ff5ef689d72..1afca1b1cdea 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -40,12 +40,18 @@ bool within_error_injection_list(unsigned long addr)  int get_injectable_error_type(unsigned long addr)  {  	struct ei_entry *ent; +	int ei_type = EI_ETYPE_NONE; +	mutex_lock(&ei_mutex);  	list_for_each_entry(ent, &error_injection_list, list) { -		if (addr >= ent->start_addr && addr < ent->end_addr) -			return ent->etype; +		if (addr >= ent->start_addr && addr < ent->end_addr) { +			ei_type = ent->etype; +			break; +		}  	} -	return EI_ETYPE_NONE; +	mutex_unlock(&ei_mutex); + +	return ei_type;  }  /* @@ -197,24 +203,14 @@ static int ei_seq_show(struct seq_file *m, void *v)  	return 0;  } -static const struct seq_operations ei_seq_ops = { +static const struct seq_operations ei_sops = {  	.start = ei_seq_start,  	.next  = ei_seq_next,  	.stop  = ei_seq_stop,  	.show  = ei_seq_show,  }; -static int ei_open(struct inode *inode, struct file *filp) -{ -	return seq_open(filp, &ei_seq_ops); -} - -static const struct file_operations debugfs_ei_ops = { -	.open           = ei_open, -	.read           = seq_read, -	.llseek         = seq_lseek, -	.release        = seq_release, -}; +DEFINE_SEQ_ATTRIBUTE(ei);  static int __init ei_debugfs_init(void)  { @@ -224,7 +220,7 @@ static int __init ei_debugfs_init(void)  	if (!dir)  		return -ENOMEM; -	file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops); +	file = debugfs_create_file("list", 0444, dir, NULL, &ei_fops);  	if (!file) {  		debugfs_remove(dir);  		return -ENOMEM; diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index 53e7eb1dd76c..05cccbcf1661 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -63,18 +63,13 @@ void fprop_global_destroy(struct fprop_global *p)   */  bool fprop_new_period(struct fprop_global *p, int periods)  { -	s64 events; -	unsigned long flags; +	s64 events = percpu_counter_sum(&p->events); -	local_irq_save(flags); -	events = percpu_counter_sum(&p->events);  	/*  	 * Don't do anything if there are no events.  	 */ -	if (events <= 1) { -		local_irq_restore(flags); +	if (events <= 1)  		return false; -	}  	write_seqcount_begin(&p->sequence);  	if (periods < 64)  		events -= events >> periods; @@ -82,7 +77,6 @@ bool fprop_new_period(struct fprop_global *p, int periods)  	percpu_counter_add(&p->events, -events);  	p->period += periods;  	write_seqcount_end(&p->sequence); -	local_irq_restore(flags);  	return true;  } diff --git a/lib/idr.c b/lib/idr.c index f4ab4f4aa3c7..7ecdfdb5309e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -491,7 +491,8 @@ void ida_free(struct ida *ida, unsigned int id)  	struct ida_bitmap *bitmap;  	unsigned long flags; -	BUG_ON((int)id < 0); +	if ((int)id < 0) +		return;  	xas_lock_irqsave(&xas, flags);  	bitmap = xas_load(&xas); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0b64695ab632..4b7fce72e3e5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,6 +16,16 @@  #define PIPE_PARANOIA /* for now */ +/* covers ubuf and kbuf alike */ +#define iterate_buf(i, n, base, len, off, __p, STEP) {		\ +	size_t __maybe_unused off = 0;				\ +	len = n;						\ +	base = __p + i->iov_offset;				\ +	len -= (STEP);						\ +	i->iov_offset += len;					\ +	n = len;						\ +} +  /* covers iovec and kvec alike */  #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\  	size_t off = 0;						\ @@ -110,7 +120,12 @@ __out:								\  	if (unlikely(i->count < n))				\  		n = i->count;					\  	if (likely(n)) {					\ -		if (likely(iter_is_iovec(i))) {			\ +		if (likely(iter_is_ubuf(i))) {			\ +			void __user *base;			\ +			size_t len;				\ +			iterate_buf(i, n, base, len, off,	\ +						i->ubuf, (I)) 	\ +		} else if (likely(iter_is_iovec(i))) {		\  			const struct iovec *iov = i->iov;	\  			void __user *base;			\  			size_t len;				\ @@ -168,172 +183,10 @@ static int copyin(void *to, const void __user *from, size_t n)  	return n;  } -static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, -			 struct iov_iter *i) +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, +					   unsigned int slot)  { -	size_t skip, copy, left, wanted; -	const struct iovec *iov; -	char __user *buf; -	void *kaddr, *from; - -	if (unlikely(bytes > i->count)) -		bytes = i->count; - -	if (unlikely(!bytes)) -		return 0; - -	might_fault(); -	wanted = bytes; -	iov = i->iov; -	skip = i->iov_offset; -	buf = iov->iov_base + skip; -	copy = min(bytes, iov->iov_len - skip); - -	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { -		kaddr = kmap_atomic(page); -		from = kaddr + offset; - -		/* first chunk, usually the only one */ -		left = copyout(buf, from, copy); -		copy -= left; -		skip += copy; -		from += copy; -		bytes -= copy; - -		while (unlikely(!left && bytes)) { -			iov++; -			buf = iov->iov_base; -			copy = min(bytes, iov->iov_len); -			left = copyout(buf, from, copy); -			copy -= left; -			skip = copy; -			from += copy; -			bytes -= copy; -		} -		if (likely(!bytes)) { -			kunmap_atomic(kaddr); -			goto done; -		} -		offset = from - kaddr; -		buf += copy; -		kunmap_atomic(kaddr); -		copy = min(bytes, iov->iov_len - skip); -	} -	/* Too bad - revert to non-atomic kmap */ - -	kaddr = kmap(page); -	from = kaddr + offset; -	left = copyout(buf, from, copy); -	copy -= left; -	skip += copy; -	from += copy; -	bytes -= copy; -	while (unlikely(!left && bytes)) { -		iov++; -		buf = iov->iov_base; -		copy = min(bytes, iov->iov_len); -		left = copyout(buf, from, copy); -		copy -= left; -		skip = copy; -		from += copy; -		bytes -= copy; -	} -	kunmap(page); - -done: -	if (skip == iov->iov_len) { -		iov++; -		skip = 0; -	} -	i->count -= wanted - bytes; -	i->nr_segs -= iov - i->iov; -	i->iov = iov; -	i->iov_offset = skip; -	return wanted - bytes; -} - -static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, -			 struct iov_iter *i) -{ -	size_t skip, copy, left, wanted; -	const struct iovec *iov; -	char __user *buf; -	void *kaddr, *to; - -	if (unlikely(bytes > i->count)) -		bytes = i->count; - -	if (unlikely(!bytes)) -		return 0; - -	might_fault(); -	wanted = bytes; -	iov = i->iov; -	skip = i->iov_offset; -	buf = iov->iov_base + skip; -	copy = min(bytes, iov->iov_len - skip); - -	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { -		kaddr = kmap_atomic(page); -		to = kaddr + offset; - -		/* first chunk, usually the only one */ -		left = copyin(to, buf, copy); -		copy -= left; -		skip += copy; -		to += copy; -		bytes -= copy; - -		while (unlikely(!left && bytes)) { -			iov++; -			buf = iov->iov_base; -			copy = min(bytes, iov->iov_len); -			left = copyin(to, buf, copy); -			copy -= left; -			skip = copy; -			to += copy; -			bytes -= copy; -		} -		if (likely(!bytes)) { -			kunmap_atomic(kaddr); -			goto done; -		} -		offset = to - kaddr; -		buf += copy; -		kunmap_atomic(kaddr); -		copy = min(bytes, iov->iov_len - skip); -	} -	/* Too bad - revert to non-atomic kmap */ - -	kaddr = kmap(page); -	to = kaddr + offset; -	left = copyin(to, buf, copy); -	copy -= left; -	skip += copy; -	to += copy; -	bytes -= copy; -	while (unlikely(!left && bytes)) { -		iov++; -		buf = iov->iov_base; -		copy = min(bytes, iov->iov_len); -		left = copyin(to, buf, copy); -		copy -= left; -		skip = copy; -		to += copy; -		bytes -= copy; -	} -	kunmap(page); - -done: -	if (skip == iov->iov_len) { -		iov++; -		skip = 0; -	} -	i->count -= wanted - bytes; -	i->nr_segs -= iov - i->iov; -	i->iov = iov; -	i->iov_offset = skip; -	return wanted - bytes; +	return &pipe->bufs[slot & (pipe->ring_size - 1)];  }  #ifdef PIPE_PARANOIA @@ -342,20 +195,19 @@ static bool sanity(const struct iov_iter *i)  	struct pipe_inode_info *pipe = i->pipe;  	unsigned int p_head = pipe->head;  	unsigned int p_tail = pipe->tail; -	unsigned int p_mask = pipe->ring_size - 1;  	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);  	unsigned int i_head = i->head;  	unsigned int idx; -	if (i->iov_offset) { +	if (i->last_offset) {  		struct pipe_buffer *p;  		if (unlikely(p_occupancy == 0))  			goto Bad;	// pipe must be non-empty  		if (unlikely(i_head != p_head - 1))  			goto Bad;	// must be at the last buffer... -		p = &pipe->bufs[i_head & p_mask]; -		if (unlikely(p->offset + p->len != i->iov_offset)) +		p = pipe_buf(pipe, i_head); +		if (unlikely(p->offset + p->len != abs(i->last_offset)))  			goto Bad;	// ... at the end of segment  	} else {  		if (i_head != p_head) @@ -363,7 +215,7 @@ static bool sanity(const struct iov_iter *i)  	}  	return true;  Bad: -	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); +	printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset);  	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",  			p_head, p_tail, pipe->ring_size);  	for (idx = 0; idx < pipe->ring_size; idx++) @@ -379,15 +231,79 @@ Bad:  #define sanity(i) true  #endif +static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) +{ +	struct page *page = alloc_page(GFP_USER); +	if (page) { +		struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); +		*buf = (struct pipe_buffer) { +			.ops = &default_pipe_buf_ops, +			.page = page, +			.offset = 0, +			.len = size +		}; +	} +	return page; +} + +static void push_page(struct pipe_inode_info *pipe, struct page *page, +			unsigned int offset, unsigned int size) +{ +	struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); +	*buf = (struct pipe_buffer) { +		.ops = &page_cache_pipe_buf_ops, +		.page = page, +		.offset = offset, +		.len = size +	}; +	get_page(page); +} + +static inline int last_offset(const struct pipe_buffer *buf) +{ +	if (buf->ops == &default_pipe_buf_ops) +		return buf->len;	// buf->offset is 0 for those +	else +		return -(buf->offset + buf->len); +} + +static struct page *append_pipe(struct iov_iter *i, size_t size, +				unsigned int *off) +{ +	struct pipe_inode_info *pipe = i->pipe; +	int offset = i->last_offset; +	struct pipe_buffer *buf; +	struct page *page; + +	if (offset > 0 && offset < PAGE_SIZE) { +		// some space in the last buffer; add to it +		buf = pipe_buf(pipe, pipe->head - 1); +		size = min_t(size_t, size, PAGE_SIZE - offset); +		buf->len += size; +		i->last_offset += size; +		i->count -= size; +		*off = offset; +		return buf->page; +	} +	// OK, we need a new buffer +	*off = 0; +	size = min_t(size_t, size, PAGE_SIZE); +	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) +		return NULL; +	page = push_anon(pipe, size); +	if (!page) +		return NULL; +	i->head = pipe->head - 1; +	i->last_offset = size; +	i->count -= size; +	return page; +} +  static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,  			 struct iov_iter *i)  {  	struct pipe_inode_info *pipe = i->pipe; -	struct pipe_buffer *buf; -	unsigned int p_tail = pipe->tail; -	unsigned int p_mask = pipe->ring_size - 1; -	unsigned int i_head = i->head; -	size_t off; +	unsigned int head = pipe->head;  	if (unlikely(bytes > i->count))  		bytes = i->count; @@ -398,32 +314,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by  	if (!sanity(i))  		return 0; -	off = i->iov_offset; -	buf = &pipe->bufs[i_head & p_mask]; -	if (off) { -		if (offset == off && buf->page == page) { -			/* merge with the last one */ +	if (offset && i->last_offset == -offset) { // could we merge it? +		struct pipe_buffer *buf = pipe_buf(pipe, head - 1); +		if (buf->page == page) {  			buf->len += bytes; -			i->iov_offset += bytes; -			goto out; +			i->last_offset -= bytes; +			i->count -= bytes; +			return bytes;  		} -		i_head++; -		buf = &pipe->bufs[i_head & p_mask];  	} -	if (pipe_full(i_head, p_tail, pipe->max_usage)) +	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))  		return 0; -	buf->ops = &page_cache_pipe_buf_ops; -	buf->flags = 0; -	get_page(page); -	buf->page = page; -	buf->offset = offset; -	buf->len = bytes; - -	pipe->head = i_head + 1; -	i->iov_offset = offset + bytes; -	i->head = i_head; -out: +	push_page(pipe, page, offset, bytes); +	i->last_offset = -(offset + bytes); +	i->head = head;  	i->count -= bytes;  	return bytes;  } @@ -443,7 +348,11 @@ out:   */  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)  { -	if (iter_is_iovec(i)) { +	if (iter_is_ubuf(i)) { +		size_t n = min(size, iov_iter_count(i)); +		n -= fault_in_readable(i->ubuf + i->iov_offset, n); +		return size - n; +	} else if (iter_is_iovec(i)) {  		size_t count = min(size, iov_iter_count(i));  		const struct iovec *p;  		size_t skip; @@ -482,7 +391,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);   */  size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)  { -	if (iter_is_iovec(i)) { +	if (iter_is_ubuf(i)) { +		size_t n = min(size, iov_iter_count(i)); +		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); +		return size - n; +	} else if (iter_is_iovec(i)) {  		size_t count = min(size, iov_iter_count(i));  		const struct iovec *p;  		size_t skip; @@ -513,6 +426,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,  	*i = (struct iov_iter) {  		.iter_type = ITER_IOVEC,  		.nofault = false, +		.user_backed = true,  		.data_source = direction,  		.iov = iov,  		.nr_segs = nr_segs, @@ -522,101 +436,43 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,  }  EXPORT_SYMBOL(iov_iter_init); -static inline bool allocated(struct pipe_buffer *buf) -{ -	return buf->ops == &default_pipe_buf_ops; -} - -static inline void data_start(const struct iov_iter *i, -			      unsigned int *iter_headp, size_t *offp) -{ -	unsigned int p_mask = i->pipe->ring_size - 1; -	unsigned int iter_head = i->head; -	size_t off = i->iov_offset; - -	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || -		    off == PAGE_SIZE)) { -		iter_head++; -		off = 0; -	} -	*iter_headp = iter_head; -	*offp = off; -} - -static size_t push_pipe(struct iov_iter *i, size_t size, -			int *iter_headp, size_t *offp) +// returns the offset in partial buffer (if any) +static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages)  {  	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_tail = pipe->tail; -	unsigned int p_mask = pipe->ring_size - 1; -	unsigned int iter_head; -	size_t off; -	ssize_t left; +	int used = pipe->head - pipe->tail; +	int off = i->last_offset; -	if (unlikely(size > i->count)) -		size = i->count; -	if (unlikely(!size)) -		return 0; +	*npages = max((int)pipe->max_usage - used, 0); -	left = size; -	data_start(i, &iter_head, &off); -	*iter_headp = iter_head; -	*offp = off; -	if (off) { -		left -= PAGE_SIZE - off; -		if (left <= 0) { -			pipe->bufs[iter_head & p_mask].len += size; -			return size; -		} -		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; -		iter_head++; -	} -	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { -		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; -		struct page *page = alloc_page(GFP_USER); -		if (!page) -			break; - -		buf->ops = &default_pipe_buf_ops; -		buf->flags = 0; -		buf->page = page; -		buf->offset = 0; -		buf->len = min_t(ssize_t, left, PAGE_SIZE); -		left -= buf->len; -		iter_head++; -		pipe->head = iter_head; - -		if (left == 0) -			return size; +	if (off > 0 && off < PAGE_SIZE) { // anon and not full +		(*npages)++; +		return off;  	} -	return size - left; +	return 0;  }  static size_t copy_pipe_to_iter(const void *addr, size_t bytes,  				struct iov_iter *i)  { -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_mask = pipe->ring_size - 1; -	unsigned int i_head; -	size_t n, off; +	unsigned int off, chunk; -	if (!sanity(i)) +	if (unlikely(bytes > i->count)) +		bytes = i->count; +	if (unlikely(!bytes))  		return 0; -	bytes = n = push_pipe(i, bytes, &i_head, &off); -	if (unlikely(!n)) +	if (!sanity(i))  		return 0; -	do { -		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); -		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); -		i->head = i_head; -		i->iov_offset = off + chunk; -		n -= chunk; + +	for (size_t n = bytes; n; n -= chunk) { +		struct page *page = append_pipe(i, n, &off); +		chunk = min_t(size_t, n, PAGE_SIZE - off); +		if (!page) +			return bytes - n; +		memcpy_to_page(page, off, addr, chunk);  		addr += chunk; -		off = 0; -		i_head++; -	} while (n); -	i->count -= bytes; +	}  	return bytes;  } @@ -630,31 +486,32 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,  static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,  					 struct iov_iter *i, __wsum *sump)  { -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_mask = pipe->ring_size - 1;  	__wsum sum = *sump;  	size_t off = 0; -	unsigned int i_head; -	size_t r; +	unsigned int chunk, r; + +	if (unlikely(bytes > i->count)) +		bytes = i->count; +	if (unlikely(!bytes)) +		return 0;  	if (!sanity(i))  		return 0; -	bytes = push_pipe(i, bytes, &i_head, &r);  	while (bytes) { -		size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); -		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); +		struct page *page = append_pipe(i, bytes, &r); +		char *p; + +		if (!page) +			break; +		chunk = min_t(size_t, bytes, PAGE_SIZE - r); +		p = kmap_local_page(page);  		sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);  		kunmap_local(p); -		i->head = i_head; -		i->iov_offset = r + chunk; -		bytes -= chunk;  		off += chunk; -		r = 0; -		i_head++; +		bytes -= chunk;  	}  	*sump = sum; -	i->count -= off;  	return off;  } @@ -662,7 +519,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)  {  	if (unlikely(iov_iter_is_pipe(i)))  		return copy_pipe_to_iter(addr, bytes, i); -	if (iter_is_iovec(i)) +	if (user_backed_iter(i))  		might_fault();  	iterate_and_advance(i, bytes, base, len, off,  		copyout(base, addr + off, len), @@ -686,32 +543,36 @@ static int copyout_mc(void __user *to, const void *from, size_t n)  static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,  				struct iov_iter *i)  { -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_mask = pipe->ring_size - 1; -	unsigned int i_head; -	size_t n, off, xfer = 0; +	size_t xfer = 0; +	unsigned int off, chunk; + +	if (unlikely(bytes > i->count)) +		bytes = i->count; +	if (unlikely(!bytes)) +		return 0;  	if (!sanity(i))  		return 0; -	n = push_pipe(i, bytes, &i_head, &off); -	while (n) { -		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); -		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); +	while (bytes) { +		struct page *page = append_pipe(i, bytes, &off);  		unsigned long rem; +		char *p; + +		if (!page) +			break; +		chunk = min_t(size_t, bytes, PAGE_SIZE - off); +		p = kmap_local_page(page);  		rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);  		chunk -= rem;  		kunmap_local(p); -		i->head = i_head; -		i->iov_offset = off + chunk;  		xfer += chunk; -		if (rem) +		bytes -= chunk; +		if (rem) { +			iov_iter_revert(i, rem);  			break; -		n -= chunk; -		off = 0; -		i_head++; +		}  	} -	i->count -= xfer;  	return xfer;  } @@ -744,7 +605,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)  {  	if (unlikely(iov_iter_is_pipe(i)))  		return copy_mc_pipe_to_iter(addr, bytes, i); -	if (iter_is_iovec(i)) +	if (user_backed_iter(i))  		might_fault();  	__iterate_and_advance(i, bytes, base, len, off,  		copyout_mc(base, addr + off, len), @@ -762,7 +623,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)  		WARN_ON(1);  		return 0;  	} -	if (iter_is_iovec(i)) +	if (user_backed_iter(i))  		might_fault();  	iterate_and_advance(i, bytes, base, len, off,  		copyin(addr + off, base, len), @@ -845,40 +706,21 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)  	return false;  } -static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, -			 struct iov_iter *i) -{ -	if (likely(iter_is_iovec(i))) -		return copy_page_to_iter_iovec(page, offset, bytes, i); -	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { -		void *kaddr = kmap_local_page(page); -		size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); -		kunmap_local(kaddr); -		return wanted; -	} -	if (iov_iter_is_pipe(i)) -		return copy_page_to_iter_pipe(page, offset, bytes, i); -	if (unlikely(iov_iter_is_discard(i))) { -		if (unlikely(i->count < bytes)) -			bytes = i->count; -		i->count -= bytes; -		return bytes; -	} -	WARN_ON(1); -	return 0; -} -  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,  			 struct iov_iter *i)  {  	size_t res = 0;  	if (unlikely(!page_copy_sane(page, offset, bytes)))  		return 0; +	if (unlikely(iov_iter_is_pipe(i))) +		return copy_page_to_iter_pipe(page, offset, bytes, i);  	page += offset / PAGE_SIZE; // first subpage  	offset %= PAGE_SIZE;  	while (1) { -		size_t n = __copy_page_to_iter(page, offset, -				min(bytes, (size_t)PAGE_SIZE - offset), i); +		void *kaddr = kmap_local_page(page); +		size_t n = min(bytes, (size_t)PAGE_SIZE - offset); +		n = _copy_to_iter(kaddr + offset, n, i); +		kunmap_local(kaddr);  		res += n;  		bytes -= n;  		if (!bytes || !n) @@ -896,47 +738,53 @@ EXPORT_SYMBOL(copy_page_to_iter);  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,  			 struct iov_iter *i)  { -	if (unlikely(!page_copy_sane(page, offset, bytes))) +	size_t res = 0; +	if (!page_copy_sane(page, offset, bytes))  		return 0; -	if (likely(iter_is_iovec(i))) -		return copy_page_from_iter_iovec(page, offset, bytes, i); -	if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { +	page += offset / PAGE_SIZE; // first subpage +	offset %= PAGE_SIZE; +	while (1) {  		void *kaddr = kmap_local_page(page); -		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); +		size_t n = min(bytes, (size_t)PAGE_SIZE - offset); +		n = _copy_from_iter(kaddr + offset, n, i);  		kunmap_local(kaddr); -		return wanted; +		res += n; +		bytes -= n; +		if (!bytes || !n) +			break; +		offset += n; +		if (offset == PAGE_SIZE) { +			page++; +			offset = 0; +		}  	} -	WARN_ON(1); -	return 0; +	return res;  }  EXPORT_SYMBOL(copy_page_from_iter);  static size_t pipe_zero(size_t bytes, struct iov_iter *i)  { -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_mask = pipe->ring_size - 1; -	unsigned int i_head; -	size_t n, off; +	unsigned int chunk, off; -	if (!sanity(i)) +	if (unlikely(bytes > i->count)) +		bytes = i->count; +	if (unlikely(!bytes))  		return 0; -	bytes = n = push_pipe(i, bytes, &i_head, &off); -	if (unlikely(!n)) +	if (!sanity(i))  		return 0; -	do { -		size_t chunk = min_t(size_t, n, PAGE_SIZE - off); -		char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); +	for (size_t n = bytes; n; n -= chunk) { +		struct page *page = append_pipe(i, n, &off); +		char *p; + +		if (!page) +			return bytes - n; +		chunk = min_t(size_t, n, PAGE_SIZE - off); +		p = kmap_local_page(page);  		memset(p + off, 0, chunk);  		kunmap_local(p); -		i->head = i_head; -		i->iov_offset = off + chunk; -		n -= chunk; -		off = 0; -		i_head++; -	} while (n); -	i->count -= bytes; +	}  	return bytes;  } @@ -975,71 +823,50 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt  }  EXPORT_SYMBOL(copy_page_from_iter_atomic); -static inline void pipe_truncate(struct iov_iter *i) -{ -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_tail = pipe->tail; -	unsigned int p_head = pipe->head; -	unsigned int p_mask = pipe->ring_size - 1; - -	if (!pipe_empty(p_head, p_tail)) { -		struct pipe_buffer *buf; -		unsigned int i_head = i->head; -		size_t off = i->iov_offset; - -		if (off) { -			buf = &pipe->bufs[i_head & p_mask]; -			buf->len = off - buf->offset; -			i_head++; -		} -		while (p_head != i_head) { -			p_head--; -			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); -		} - -		pipe->head = p_head; -	} -} -  static void pipe_advance(struct iov_iter *i, size_t size)  {  	struct pipe_inode_info *pipe = i->pipe; -	if (size) { -		struct pipe_buffer *buf; -		unsigned int p_mask = pipe->ring_size - 1; -		unsigned int i_head = i->head; -		size_t off = i->iov_offset, left = size; +	int off = i->last_offset; +	if (!off && !size) { +		pipe_discard_from(pipe, i->start_head); // discard everything +		return; +	} +	i->count -= size; +	while (1) { +		struct pipe_buffer *buf = pipe_buf(pipe, i->head);  		if (off) /* make it relative to the beginning of buffer */ -			left += off - pipe->bufs[i_head & p_mask].offset; -		while (1) { -			buf = &pipe->bufs[i_head & p_mask]; -			if (left <= buf->len) -				break; -			left -= buf->len; -			i_head++; +			size += abs(off) - buf->offset; +		if (size <= buf->len) { +			buf->len = size; +			i->last_offset = last_offset(buf); +			break;  		} -		i->head = i_head; -		i->iov_offset = buf->offset + left; +		size -= buf->len; +		i->head++; +		off = 0;  	} -	i->count -= size; -	/* ... and discard everything past that point */ -	pipe_truncate(i); +	pipe_discard_from(pipe, i->head + 1); // discard everything past this one  }  static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)  { -	struct bvec_iter bi; +	const struct bio_vec *bvec, *end; + +	if (!i->count) +		return; +	i->count -= size; -	bi.bi_size = i->count; -	bi.bi_bvec_done = i->iov_offset; -	bi.bi_idx = 0; -	bvec_iter_advance(i->bvec, &bi, size); +	size += i->iov_offset; -	i->bvec += bi.bi_idx; -	i->nr_segs -= bi.bi_idx; -	i->count = bi.bi_size; -	i->iov_offset = bi.bi_bvec_done; +	for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { +		if (likely(size < bvec->bv_len)) +			break; +		size -= bvec->bv_len; +	} +	i->iov_offset = size; +	i->nr_segs -= bvec - i->bvec; +	i->bvec = bvec;  }  static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) @@ -1065,16 +892,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)  {  	if (unlikely(i->count < size))  		size = i->count; -	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { +	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { +		i->iov_offset += size; +		i->count -= size; +	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {  		/* iovec and kvec have identical layouts */  		iov_iter_iovec_advance(i, size);  	} else if (iov_iter_is_bvec(i)) {  		iov_iter_bvec_advance(i, size);  	} else if (iov_iter_is_pipe(i)) {  		pipe_advance(i, size); -	} else if (unlikely(iov_iter_is_xarray(i))) { -		i->iov_offset += size; -		i->count -= size;  	} else if (iov_iter_is_discard(i)) {  		i->count -= size;  	} @@ -1090,28 +917,22 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)  	i->count += unroll;  	if (unlikely(iov_iter_is_pipe(i))) {  		struct pipe_inode_info *pipe = i->pipe; -		unsigned int p_mask = pipe->ring_size - 1; -		unsigned int i_head = i->head; -		size_t off = i->iov_offset; -		while (1) { -			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; -			size_t n = off - b->offset; -			if (unroll < n) { -				off -= unroll; -				break; -			} -			unroll -= n; -			if (!unroll && i_head == i->start_head) { -				off = 0; -				break; +		unsigned int head = pipe->head; + +		while (head > i->start_head) { +			struct pipe_buffer *b = pipe_buf(pipe, --head); +			if (unroll < b->len) { +				b->len -= unroll; +				i->last_offset = last_offset(b); +				i->head = head; +				return;  			} -			i_head--; -			b = &pipe->bufs[i_head & p_mask]; -			off = b->offset + b->len; +			unroll -= b->len; +			pipe_buf_release(pipe, b); +			pipe->head--;  		} -		i->iov_offset = off; -		i->head = i_head; -		pipe_truncate(i); +		i->last_offset = 0; +		i->head = head;  		return;  	}  	if (unlikely(iov_iter_is_discard(i))) @@ -1121,7 +942,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)  		return;  	}  	unroll -= i->iov_offset; -	if (iov_iter_is_xarray(i)) { +	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {  		BUG(); /* We should never go beyond the start of the specified  			* range since we might then be straying into pages that  			* aren't pinned. @@ -1213,7 +1034,7 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,  		.pipe = pipe,  		.head = pipe->head,  		.start_head = pipe->head, -		.iov_offset = 0, +		.last_offset = 0,  		.count = count  	};  } @@ -1268,6 +1089,105 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)  }  EXPORT_SYMBOL(iov_iter_discard); +static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, +				   unsigned len_mask) +{ +	size_t size = i->count; +	size_t skip = i->iov_offset; +	unsigned k; + +	for (k = 0; k < i->nr_segs; k++, skip = 0) { +		size_t len = i->iov[k].iov_len - skip; + +		if (len > size) +			len = size; +		if (len & len_mask) +			return false; +		if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask) +			return false; + +		size -= len; +		if (!size) +			break; +	} +	return true; +} + +static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, +				  unsigned len_mask) +{ +	size_t size = i->count; +	unsigned skip = i->iov_offset; +	unsigned k; + +	for (k = 0; k < i->nr_segs; k++, skip = 0) { +		size_t len = i->bvec[k].bv_len - skip; + +		if (len > size) +			len = size; +		if (len & len_mask) +			return false; +		if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask) +			return false; + +		size -= len; +		if (!size) +			break; +	} +	return true; +} + +/** + * iov_iter_is_aligned() - Check if the addresses and lengths of each segments + * 	are aligned to the parameters. + * + * @i: &struct iov_iter to restore + * @addr_mask: bit mask to check against the iov element's addresses + * @len_mask: bit mask to check against the iov element's lengths + * + * Return: false if any addresses or lengths intersect with the provided masks + */ +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, +			 unsigned len_mask) +{ +	if (likely(iter_is_ubuf(i))) { +		if (i->count & len_mask) +			return false; +		if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) +			return false; +		return true; +	} + +	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) +		return iov_iter_aligned_iovec(i, addr_mask, len_mask); + +	if (iov_iter_is_bvec(i)) +		return iov_iter_aligned_bvec(i, addr_mask, len_mask); + +	if (iov_iter_is_pipe(i)) { +		size_t size = i->count; + +		if (size & len_mask) +			return false; +		if (size && i->last_offset > 0) { +			if (i->last_offset & addr_mask) +				return false; +		} + +		return true; +	} + +	if (iov_iter_is_xarray(i)) { +		if (i->count & len_mask) +			return false; +		if ((i->xarray_start + i->iov_offset) & addr_mask) +			return false; +	} + +	return true; +} +EXPORT_SYMBOL_GPL(iov_iter_is_aligned); +  static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)  {  	unsigned long res = 0; @@ -1312,6 +1232,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)  unsigned long iov_iter_alignment(const struct iov_iter *i)  { +	if (likely(iter_is_ubuf(i))) { +		size_t size = i->count; +		if (size) +			return ((unsigned long)i->ubuf + i->iov_offset) | size; +		return 0; +	} +  	/* iovec and kvec have identical layouts */  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))  		return iov_iter_alignment_iovec(i); @@ -1320,11 +1247,10 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)  		return iov_iter_alignment_bvec(i);  	if (iov_iter_is_pipe(i)) { -		unsigned int p_mask = i->pipe->ring_size - 1;  		size_t size = i->count; -		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) -			return size | i->iov_offset; +		if (size && i->last_offset > 0) +			return size | i->last_offset;  		return size;  	} @@ -1342,6 +1268,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)  	size_t size = i->count;  	unsigned k; +	if (iter_is_ubuf(i)) +		return 0; +  	if (WARN_ON(!iter_is_iovec(i)))  		return ~0U; @@ -1360,45 +1289,50 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)  }  EXPORT_SYMBOL(iov_iter_gap_alignment); -static inline ssize_t __pipe_get_pages(struct iov_iter *i, -				size_t maxsize, -				struct page **pages, -				int iter_head, -				size_t *start) +static int want_pages_array(struct page ***res, size_t size, +			    size_t start, unsigned int maxpages)  { -	struct pipe_inode_info *pipe = i->pipe; -	unsigned int p_mask = pipe->ring_size - 1; -	ssize_t n = push_pipe(i, maxsize, &iter_head, start); -	if (!n) -		return -EFAULT; +	unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); -	maxsize = n; -	n += *start; -	while (n > 0) { -		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); -		iter_head++; -		n -= PAGE_SIZE; +	if (count > maxpages) +		count = maxpages; +	WARN_ON(!count);	// caller should've prevented that +	if (!*res) { +		*res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); +		if (!*res) +			return 0;  	} - -	return maxsize; +	return count;  }  static ssize_t pipe_get_pages(struct iov_iter *i, -		   struct page **pages, size_t maxsize, unsigned maxpages, +		   struct page ***pages, size_t maxsize, unsigned maxpages,  		   size_t *start)  { -	unsigned int iter_head, npages; -	size_t capacity; +	unsigned int npages, count, off, chunk; +	struct page **p; +	size_t left;  	if (!sanity(i))  		return -EFAULT; -	data_start(i, &iter_head, start); -	/* Amount of free space: some of this one + all after this one */ -	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); -	capacity = min(npages, maxpages) * PAGE_SIZE - *start; - -	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); +	*start = off = pipe_npages(i, &npages); +	if (!npages) +		return -EFAULT; +	count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); +	if (!count) +		return -ENOMEM; +	p = *pages; +	for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { +		struct page *page = append_pipe(i, left, &off); +		if (!page) +			break; +		chunk = min_t(size_t, left, PAGE_SIZE - off); +		get_page(*p++ = page); +	} +	if (!npages) +		return -EFAULT; +	return maxsize - left;  }  static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1429,122 +1363,124 @@ static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa  }  static ssize_t iter_xarray_get_pages(struct iov_iter *i, -				     struct page **pages, size_t maxsize, +				     struct page ***pages, size_t maxsize,  				     unsigned maxpages, size_t *_start_offset)  { -	unsigned nr, offset; -	pgoff_t index, count; -	size_t size = maxsize; +	unsigned nr, offset, count; +	pgoff_t index;  	loff_t pos; -	if (!size || !maxpages) -		return 0; -  	pos = i->xarray_start + i->iov_offset;  	index = pos >> PAGE_SHIFT;  	offset = pos & ~PAGE_MASK;  	*_start_offset = offset; -	count = 1; -	if (size > PAGE_SIZE - offset) { -		size -= PAGE_SIZE - offset; -		count += size >> PAGE_SHIFT; -		size &= ~PAGE_MASK; -		if (size) -			count++; -	} - -	if (count > maxpages) -		count = maxpages; - -	nr = iter_xarray_populate_pages(pages, i->xarray, index, count); +	count = want_pages_array(pages, maxsize, offset, maxpages); +	if (!count) +		return -ENOMEM; +	nr = iter_xarray_populate_pages(*pages, i->xarray, index, count);  	if (nr == 0)  		return 0; -	return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); +	maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); +	i->iov_offset += maxsize; +	i->count -= maxsize; +	return maxsize;  } -/* must be done on non-empty ITER_IOVEC one */ -static unsigned long first_iovec_segment(const struct iov_iter *i, -					 size_t *size, size_t *start, -					 size_t maxsize, unsigned maxpages) +/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ +static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)  {  	size_t skip;  	long k; +	if (iter_is_ubuf(i)) +		return (unsigned long)i->ubuf + i->iov_offset; +  	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { -		unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;  		size_t len = i->iov[k].iov_len - skip;  		if (unlikely(!len))  			continue; -		if (len > maxsize) -			len = maxsize; -		len += (*start = addr % PAGE_SIZE); -		if (len > maxpages * PAGE_SIZE) -			len = maxpages * PAGE_SIZE; -		*size = len; -		return addr & PAGE_MASK; +		if (*size > len) +			*size = len; +		return (unsigned long)i->iov[k].iov_base + skip;  	}  	BUG(); // if it had been empty, we wouldn't get called  }  /* must be done on non-empty ITER_BVEC one */  static struct page *first_bvec_segment(const struct iov_iter *i, -				       size_t *size, size_t *start, -				       size_t maxsize, unsigned maxpages) +				       size_t *size, size_t *start)  {  	struct page *page;  	size_t skip = i->iov_offset, len;  	len = i->bvec->bv_len - skip; -	if (len > maxsize) -		len = maxsize; +	if (*size > len) +		*size = len;  	skip += i->bvec->bv_offset;  	page = i->bvec->bv_page + skip / PAGE_SIZE; -	len += (*start = skip % PAGE_SIZE); -	if (len > maxpages * PAGE_SIZE) -		len = maxpages * PAGE_SIZE; -	*size = len; +	*start = skip % PAGE_SIZE;  	return page;  } -ssize_t iov_iter_get_pages(struct iov_iter *i, -		   struct page **pages, size_t maxsize, unsigned maxpages, -		   size_t *start) +static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, +		   struct page ***pages, size_t maxsize, +		   unsigned int maxpages, size_t *start)  { -	size_t len; -	int n, res; +	unsigned int n;  	if (maxsize > i->count)  		maxsize = i->count;  	if (!maxsize)  		return 0; +	if (maxsize > MAX_RW_COUNT) +		maxsize = MAX_RW_COUNT; -	if (likely(iter_is_iovec(i))) { +	if (likely(user_backed_iter(i))) {  		unsigned int gup_flags = 0;  		unsigned long addr; +		int res;  		if (iov_iter_rw(i) != WRITE)  			gup_flags |= FOLL_WRITE;  		if (i->nofault)  			gup_flags |= FOLL_NOFAULT; -		addr = first_iovec_segment(i, &len, start, maxsize, maxpages); -		n = DIV_ROUND_UP(len, PAGE_SIZE); -		res = get_user_pages_fast(addr, n, gup_flags, pages); +		addr = first_iovec_segment(i, &maxsize); +		*start = addr % PAGE_SIZE; +		addr &= PAGE_MASK; +		n = want_pages_array(pages, maxsize, *start, maxpages); +		if (!n) +			return -ENOMEM; +		res = get_user_pages_fast(addr, n, gup_flags, *pages);  		if (unlikely(res <= 0))  			return res; -		return (res == n ? len : res * PAGE_SIZE) - *start; +		maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start); +		iov_iter_advance(i, maxsize); +		return maxsize;  	}  	if (iov_iter_is_bvec(i)) { +		struct page **p;  		struct page *page; -		page = first_bvec_segment(i, &len, start, maxsize, maxpages); -		n = DIV_ROUND_UP(len, PAGE_SIZE); -		while (n--) -			get_page(*pages++ = page++); -		return len - *start; +		page = first_bvec_segment(i, &maxsize, start); +		n = want_pages_array(pages, maxsize, *start, maxpages); +		if (!n) +			return -ENOMEM; +		p = *pages; +		for (int k = 0; k < n; k++) +			get_page(p[k] = page + k); +		maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); +		i->count -= maxsize; +		i->iov_offset += maxsize; +		if (i->iov_offset == i->bvec->bv_len) { +			i->iov_offset = 0; +			i->bvec++; +			i->nr_segs--; +		} +		return maxsize;  	}  	if (iov_iter_is_pipe(i))  		return pipe_get_pages(i, pages, maxsize, maxpages, start); @@ -1552,137 +1488,35 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,  		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);  	return -EFAULT;  } -EXPORT_SYMBOL(iov_iter_get_pages); - -static struct page **get_pages_array(size_t n) -{ -	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); -} -static ssize_t pipe_get_pages_alloc(struct iov_iter *i, -		   struct page ***pages, size_t maxsize, +ssize_t iov_iter_get_pages2(struct iov_iter *i, +		   struct page **pages, size_t maxsize, unsigned maxpages,  		   size_t *start)  { -	struct page **p; -	unsigned int iter_head, npages; -	ssize_t n; - -	if (!sanity(i)) -		return -EFAULT; - -	data_start(i, &iter_head, start); -	/* Amount of free space: some of this one + all after this one */ -	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); -	n = npages * PAGE_SIZE - *start; -	if (maxsize > n) -		maxsize = n; -	else -		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); -	p = get_pages_array(npages); -	if (!p) -		return -ENOMEM; -	n = __pipe_get_pages(i, maxsize, p, iter_head, start); -	if (n > 0) -		*pages = p; -	else -		kvfree(p); -	return n; -} - -static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, -					   struct page ***pages, size_t maxsize, -					   size_t *_start_offset) -{ -	struct page **p; -	unsigned nr, offset; -	pgoff_t index, count; -	size_t size = maxsize; -	loff_t pos; - -	if (!size) +	if (!maxpages)  		return 0; +	BUG_ON(!pages); -	pos = i->xarray_start + i->iov_offset; -	index = pos >> PAGE_SHIFT; -	offset = pos & ~PAGE_MASK; -	*_start_offset = offset; - -	count = 1; -	if (size > PAGE_SIZE - offset) { -		size -= PAGE_SIZE - offset; -		count += size >> PAGE_SHIFT; -		size &= ~PAGE_MASK; -		if (size) -			count++; -	} - -	p = get_pages_array(count); -	if (!p) -		return -ENOMEM; -	*pages = p; - -	nr = iter_xarray_populate_pages(p, i->xarray, index, count); -	if (nr == 0) -		return 0; - -	return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); +	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);  } +EXPORT_SYMBOL(iov_iter_get_pages2); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,  		   struct page ***pages, size_t maxsize,  		   size_t *start)  { -	struct page **p; -	size_t len; -	int n, res; - -	if (maxsize > i->count) -		maxsize = i->count; -	if (!maxsize) -		return 0; +	ssize_t len; -	if (likely(iter_is_iovec(i))) { -		unsigned int gup_flags = 0; -		unsigned long addr; - -		if (iov_iter_rw(i) != WRITE) -			gup_flags |= FOLL_WRITE; -		if (i->nofault) -			gup_flags |= FOLL_NOFAULT; +	*pages = NULL; -		addr = first_iovec_segment(i, &len, start, maxsize, ~0U); -		n = DIV_ROUND_UP(len, PAGE_SIZE); -		p = get_pages_array(n); -		if (!p) -			return -ENOMEM; -		res = get_user_pages_fast(addr, n, gup_flags, p); -		if (unlikely(res <= 0)) { -			kvfree(p); -			*pages = NULL; -			return res; -		} -		*pages = p; -		return (res == n ? len : res * PAGE_SIZE) - *start; +	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start); +	if (len <= 0) { +		kvfree(*pages); +		*pages = NULL;  	} -	if (iov_iter_is_bvec(i)) { -		struct page *page; - -		page = first_bvec_segment(i, &len, start, maxsize, ~0U); -		n = DIV_ROUND_UP(len, PAGE_SIZE); -		*pages = p = get_pages_array(n); -		if (!p) -			return -ENOMEM; -		while (n--) -			get_page(*p++ = page++); -		return len - *start; -	} -	if (iov_iter_is_pipe(i)) -		return pipe_get_pages_alloc(i, pages, maxsize, start); -	if (iov_iter_is_xarray(i)) -		return iter_xarray_get_pages_alloc(i, pages, maxsize, start); -	return -EFAULT; +	return len;  } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); +EXPORT_SYMBOL(iov_iter_get_pages_alloc2);  size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,  			       struct iov_iter *i) @@ -1795,22 +1629,23 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)  {  	if (unlikely(!i->count))  		return 0; +	if (likely(iter_is_ubuf(i))) { +		unsigned offs = offset_in_page(i->ubuf + i->iov_offset); +		int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE); +		return min(npages, maxpages); +	}  	/* iovec and kvec have identical layouts */  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))  		return iov_npages(i, maxpages);  	if (iov_iter_is_bvec(i))  		return bvec_npages(i, maxpages);  	if (iov_iter_is_pipe(i)) { -		unsigned int iter_head;  		int npages; -		size_t off;  		if (!sanity(i))  			return 0; -		data_start(i, &iter_head, &off); -		/* some of this one + all after this one */ -		npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); +		pipe_npages(i, &npages);  		return min(npages, maxpages);  	}  	if (iov_iter_is_xarray(i)) { @@ -1829,17 +1664,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)  		WARN_ON(1);  		return NULL;  	} -	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new))) -		return NULL;  	if (iov_iter_is_bvec(new))  		return new->bvec = kmemdup(new->bvec,  				    new->nr_segs * sizeof(struct bio_vec),  				    flags); -	else +	else if (iov_iter_is_kvec(new) || iter_is_iovec(new))  		/* iovec and kvec have identical layout */  		return new->iov = kmemdup(new->iov,  				   new->nr_segs * sizeof(struct iovec),  				   flags); +	return NULL;  }  EXPORT_SYMBOL(dup_iter); @@ -2033,10 +1867,12 @@ EXPORT_SYMBOL(import_single_range);  void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)  {  	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && -			 !iov_iter_is_kvec(i)) +			 !iov_iter_is_kvec(i) && !iter_is_ubuf(i))  		return;  	i->iov_offset = state->iov_offset;  	i->count = state->count; +	if (iter_is_ubuf(i)) +		return;  	/*  	 * For the *vec iters, nr_segs + iov is constant - if we increment  	 * the vec, then we also decrement the nr_segs count. Hence we don't diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 96f96e42ce06..5e223327196a 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -9,8 +9,8 @@   * These symbols point to the .kunit_test_suites section and are defined in   * include/asm-generic/vmlinux.lds.h, and consequently must be extern.   */ -extern struct kunit_suite * const * const __kunit_suites_start[]; -extern struct kunit_suite * const * const __kunit_suites_end[]; +extern struct kunit_suite * const __kunit_suites_start[]; +extern struct kunit_suite * const __kunit_suites_end[];  #if IS_BUILTIN(CONFIG_KUNIT) @@ -55,7 +55,7 @@ static void kunit_parse_filter_glob(struct kunit_test_filter *parsed,  /* Create a copy of suite with only tests that match test_glob. */  static struct kunit_suite * -kunit_filter_tests(struct kunit_suite *const suite, const char *test_glob) +kunit_filter_tests(const struct kunit_suite *const suite, const char *test_glob)  {  	int n = 0;  	struct kunit_case *filtered, *test_case; @@ -69,15 +69,15 @@ kunit_filter_tests(struct kunit_suite *const suite, const char *test_glob)  	if (n == 0)  		return NULL; -	/* Use memcpy to workaround copy->name being const. */ -	copy = kmalloc(sizeof(*copy), GFP_KERNEL); +	copy = kmemdup(suite, sizeof(*copy), GFP_KERNEL);  	if (!copy)  		return ERR_PTR(-ENOMEM); -	memcpy(copy, suite, sizeof(*copy));  	filtered = kcalloc(n + 1, sizeof(*filtered), GFP_KERNEL); -	if (!filtered) +	if (!filtered) { +		kfree(copy);  		return ERR_PTR(-ENOMEM); +	}  	n = 0;  	kunit_suite_for_each_test_case(suite, test_case) { @@ -92,62 +92,18 @@ kunit_filter_tests(struct kunit_suite *const suite, const char *test_glob)  static char *kunit_shutdown;  core_param(kunit_shutdown, kunit_shutdown, charp, 0644); -static struct kunit_suite * const * -kunit_filter_subsuite(struct kunit_suite * const * const subsuite, -		      struct kunit_test_filter *filter) -{ -	int i, n = 0; -	struct kunit_suite **filtered, *filtered_suite; - -	n = 0; -	for (i = 0; subsuite[i]; ++i) { -		if (glob_match(filter->suite_glob, subsuite[i]->name)) -			++n; -	} - -	if (n == 0) -		return NULL; - -	filtered = kmalloc_array(n + 1, sizeof(*filtered), GFP_KERNEL); -	if (!filtered) -		return ERR_PTR(-ENOMEM); - -	n = 0; -	for (i = 0; subsuite[i] != NULL; ++i) { -		if (!glob_match(filter->suite_glob, subsuite[i]->name)) -			continue; -		filtered_suite = kunit_filter_tests(subsuite[i], filter->test_glob); -		if (IS_ERR(filtered_suite)) -			return ERR_CAST(filtered_suite); -		else if (filtered_suite) -			filtered[n++] = filtered_suite; -	} -	filtered[n] = NULL; - -	return filtered; -} - +/* Stores an array of suites, end points one past the end */  struct suite_set { -	struct kunit_suite * const * const *start; -	struct kunit_suite * const * const *end; +	struct kunit_suite * const *start; +	struct kunit_suite * const *end;  }; -static void kunit_free_subsuite(struct kunit_suite * const *subsuite) -{ -	unsigned int i; - -	for (i = 0; subsuite[i]; i++) -		kfree(subsuite[i]); - -	kfree(subsuite); -} -  static void kunit_free_suite_set(struct suite_set suite_set)  { -	struct kunit_suite * const * const *suites; +	struct kunit_suite * const *suites;  	for (suites = suite_set.start; suites < suite_set.end; suites++) -		kunit_free_subsuite(*suites); +		kfree(*suites);  	kfree(suite_set.start);  } @@ -156,7 +112,7 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,  					    int *err)  {  	int i; -	struct kunit_suite * const **copy, * const *filtered_subsuite; +	struct kunit_suite **copy, *filtered_suite;  	struct suite_set filtered;  	struct kunit_test_filter filter; @@ -171,14 +127,19 @@ static struct suite_set kunit_filter_suites(const struct suite_set *suite_set,  	kunit_parse_filter_glob(&filter, filter_glob); -	for (i = 0; i < max; ++i) { -		filtered_subsuite = kunit_filter_subsuite(suite_set->start[i], &filter); -		if (IS_ERR(filtered_subsuite)) { -			*err = PTR_ERR(filtered_subsuite); +	for (i = 0; &suite_set->start[i] != suite_set->end; i++) { +		if (!glob_match(filter.suite_glob, suite_set->start[i]->name)) +			continue; + +		filtered_suite = kunit_filter_tests(suite_set->start[i], filter.test_glob); +		if (IS_ERR(filtered_suite)) { +			*err = PTR_ERR(filtered_suite);  			return filtered;  		} -		if (filtered_subsuite) -			*copy++ = filtered_subsuite; +		if (!filtered_suite) +			continue; + +		*copy++ = filtered_suite;  	}  	filtered.end = copy; @@ -201,52 +162,33 @@ static void kunit_handle_shutdown(void)  } -static void kunit_print_tap_header(struct suite_set *suite_set) -{ -	struct kunit_suite * const * const *suites, * const *subsuite; -	int num_of_suites = 0; - -	for (suites = suite_set->start; suites < suite_set->end; suites++) -		for (subsuite = *suites; *subsuite != NULL; subsuite++) -			num_of_suites++; - -	pr_info("TAP version 14\n"); -	pr_info("1..%d\n", num_of_suites); -} -  static void kunit_exec_run_tests(struct suite_set *suite_set)  { -	struct kunit_suite * const * const *suites; +	size_t num_suites = suite_set->end - suite_set->start; -	kunit_print_tap_header(suite_set); +	pr_info("TAP version 14\n"); +	pr_info("1..%zu\n", num_suites); -	for (suites = suite_set->start; suites < suite_set->end; suites++) -		__kunit_test_suites_init(*suites); +	__kunit_test_suites_init(suite_set->start, num_suites);  }  static void kunit_exec_list_tests(struct suite_set *suite_set)  { -	unsigned int i; -	struct kunit_suite * const * const *suites; +	struct kunit_suite * const *suites;  	struct kunit_case *test_case;  	/* Hack: print a tap header so kunit.py can find the start of KUnit output. */  	pr_info("TAP version 14\n");  	for (suites = suite_set->start; suites < suite_set->end; suites++) -		for (i = 0; (*suites)[i] != NULL; i++) { -			kunit_suite_for_each_test_case((*suites)[i], test_case) { -				pr_info("%s.%s\n", (*suites)[i]->name, test_case->name); -			} +		kunit_suite_for_each_test_case((*suites), test_case) { +			pr_info("%s.%s\n", (*suites)->name, test_case->name);  		}  }  int kunit_run_all_tests(void)  { -	struct suite_set suite_set = { -		.start = __kunit_suites_start, -		.end = __kunit_suites_end, -	}; +	struct suite_set suite_set = {__kunit_suites_start, __kunit_suites_end};  	int err = 0;  	if (filter_glob_param) { @@ -264,11 +206,10 @@ int kunit_run_all_tests(void)  	else  		pr_err("kunit executor: unknown action '%s'\n", action_param); -	if (filter_glob_param) { /* a copy was made of each array */ +	if (filter_glob_param) { /* a copy was made of each suite */  		kunit_free_suite_set(suite_set);  	} -  out:  	kunit_handle_shutdown();  	return err; diff --git a/lib/kunit/executor_test.c b/lib/kunit/executor_test.c index eac6ff480273..0cea31c27b23 100644 --- a/lib/kunit/executor_test.c +++ b/lib/kunit/executor_test.c @@ -9,8 +9,6 @@  #include <kunit/test.h>  static void kfree_at_end(struct kunit *test, const void *to_free); -static void free_subsuite_at_end(struct kunit *test, -				 struct kunit_suite *const *to_free);  static struct kunit_suite *alloc_fake_suite(struct kunit *test,  					    const char *suite_name,  					    struct kunit_case *test_cases); @@ -41,126 +39,80 @@ static void parse_filter_test(struct kunit *test)  	kfree(filter.test_glob);  } -static void filter_subsuite_test(struct kunit *test) +static void filter_suites_test(struct kunit *test)  { -	struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; -	struct kunit_suite * const *filtered; -	struct kunit_test_filter filter = { -		.suite_glob = "suite2", -		.test_glob = NULL, -	}; +	struct kunit_suite *subsuite[3] = {NULL, NULL}; +	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]}; +	struct suite_set got; +	int err = 0;  	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);  	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases);  	/* Want: suite1, suite2, NULL -> suite2, NULL */ -	filtered = kunit_filter_subsuite(subsuite, &filter); -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); -	free_subsuite_at_end(test, filtered); +	got = kunit_filter_suites(&suite_set, "suite2", &err); +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start); +	KUNIT_ASSERT_EQ(test, err, 0); +	kfree_at_end(test, got.start);  	/* Validate we just have suite2 */ -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); -	KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); -	KUNIT_EXPECT_FALSE(test, filtered[1]); +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]); +	KUNIT_EXPECT_STREQ(test, (const char *)got.start[0]->name, "suite2"); + +	/* Contains one element (end is 1 past end) */ +	KUNIT_ASSERT_EQ(test, got.end - got.start, 1);  } -static void filter_subsuite_test_glob_test(struct kunit *test) +static void filter_suites_test_glob_test(struct kunit *test)  { -	struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; -	struct kunit_suite * const *filtered; -	struct kunit_test_filter filter = { -		.suite_glob = "suite2", -		.test_glob = "test2", -	}; +	struct kunit_suite *subsuite[3] = {NULL, NULL}; +	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]}; +	struct suite_set got; +	int err = 0;  	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);  	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases);  	/* Want: suite1, suite2, NULL -> suite2 (just test1), NULL */ -	filtered = kunit_filter_subsuite(subsuite, &filter); -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); -	free_subsuite_at_end(test, filtered); +	got = kunit_filter_suites(&suite_set, "suite2.test2", &err); +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start); +	KUNIT_ASSERT_EQ(test, err, 0); +	kfree_at_end(test, got.start);  	/* Validate we just have suite2 */ -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); -	KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); -	KUNIT_EXPECT_FALSE(test, filtered[1]); +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]); +	KUNIT_EXPECT_STREQ(test, (const char *)got.start[0]->name, "suite2"); +	KUNIT_ASSERT_EQ(test, got.end - got.start, 1);  	/* Now validate we just have test2 */ -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]->test_cases); -	KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->test_cases[0].name, "test2"); -	KUNIT_EXPECT_FALSE(test, filtered[0]->test_cases[1].name); +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, got.start[0]->test_cases); +	KUNIT_EXPECT_STREQ(test, (const char *)got.start[0]->test_cases[0].name, "test2"); +	KUNIT_EXPECT_FALSE(test, got.start[0]->test_cases[1].name);  } -static void filter_subsuite_to_empty_test(struct kunit *test) +static void filter_suites_to_empty_test(struct kunit *test)  { -	struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; -	struct kunit_suite * const *filtered; -	struct kunit_test_filter filter = { -		.suite_glob = "not_found", -		.test_glob = NULL, -	}; +	struct kunit_suite *subsuite[3] = {NULL, NULL}; +	struct suite_set suite_set = {.start = subsuite, .end = &subsuite[2]}; +	struct suite_set got; +	int err = 0;  	subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases);  	subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); -	filtered = kunit_filter_subsuite(subsuite, &filter); -	free_subsuite_at_end(test, filtered); /* just in case */ +	got = kunit_filter_suites(&suite_set, "not_found", &err); +	KUNIT_ASSERT_EQ(test, err, 0); +	kfree_at_end(test, got.start); /* just in case */ -	KUNIT_EXPECT_FALSE_MSG(test, filtered, -			       "should be NULL to indicate no match"); -} - -static void kfree_subsuites_at_end(struct kunit *test, struct suite_set *suite_set) -{ -	struct kunit_suite * const * const *suites; - -	kfree_at_end(test, suite_set->start); -	for (suites = suite_set->start; suites < suite_set->end; suites++) -		free_subsuite_at_end(test, *suites); -} - -static void filter_suites_test(struct kunit *test) -{ -	/* Suites per-file are stored as a NULL terminated array */ -	struct kunit_suite *subsuites[2][2] = { -		{NULL, NULL}, -		{NULL, NULL}, -	}; -	/* Match the memory layout of suite_set */ -	struct kunit_suite * const * const suites[2] = { -		subsuites[0], subsuites[1], -	}; - -	const struct suite_set suite_set = { -		.start = suites, -		.end = suites + 2, -	}; -	struct suite_set filtered = {.start = NULL, .end = NULL}; -	int err = 0; - -	/* Emulate two files, each having one suite */ -	subsuites[0][0] = alloc_fake_suite(test, "suite0", dummy_test_cases); -	subsuites[1][0] = alloc_fake_suite(test, "suite1", dummy_test_cases); - -	/* Filter out suite1 */ -	filtered = kunit_filter_suites(&suite_set, "suite0", &err); -	kfree_subsuites_at_end(test, &filtered); /* let us use ASSERTs without leaking */ -	KUNIT_EXPECT_EQ(test, err, 0); -	KUNIT_ASSERT_EQ(test, filtered.end - filtered.start, (ptrdiff_t)1); - -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start); -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0]); -	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0][0]); -	KUNIT_EXPECT_STREQ(test, (const char *)filtered.start[0][0]->name, "suite0"); +	KUNIT_EXPECT_PTR_EQ_MSG(test, got.start, got.end, +				"should be empty to indicate no match");  }  static struct kunit_case executor_test_cases[] = {  	KUNIT_CASE(parse_filter_test), -	KUNIT_CASE(filter_subsuite_test), -	KUNIT_CASE(filter_subsuite_test_glob_test), -	KUNIT_CASE(filter_subsuite_to_empty_test),  	KUNIT_CASE(filter_suites_test), +	KUNIT_CASE(filter_suites_test_glob_test), +	KUNIT_CASE(filter_suites_to_empty_test),  	{}  }; @@ -190,20 +142,6 @@ static void kfree_at_end(struct kunit *test, const void *to_free)  			     (void *)to_free);  } -static void free_subsuite_res_free(struct kunit_resource *res) -{ -	kunit_free_subsuite(res->data); -} - -static void free_subsuite_at_end(struct kunit *test, -				 struct kunit_suite *const *to_free) -{ -	if (IS_ERR_OR_NULL(to_free)) -		return; -	kunit_alloc_resource(test, NULL, free_subsuite_res_free, -			     GFP_KERNEL, (void *)to_free); -} -  static struct kunit_suite *alloc_fake_suite(struct kunit *test,  					    const char *suite_name,  					    struct kunit_case *test_cases) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index a5053a07409f..b73d5bb5c473 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -10,7 +10,9 @@  #include <kunit/test.h>  #include <kunit/test-bug.h>  #include <linux/kernel.h> +#include <linux/module.h>  #include <linux/moduleparam.h> +#include <linux/panic.h>  #include <linux/sched/debug.h>  #include <linux/sched.h> @@ -501,6 +503,9 @@ int kunit_run_tests(struct kunit_suite *suite)  	struct kunit_result_stats suite_stats = { 0 };  	struct kunit_result_stats total_stats = { 0 }; +	/* Taint the kernel so we know we've run tests. */ +	add_taint(TAINT_TEST, LOCKDEP_STILL_OK); +  	if (suite->suite_init) {  		suite->suite_init_err = suite->suite_init(suite);  		if (suite->suite_init_err) { @@ -581,11 +586,11 @@ static void kunit_init_suite(struct kunit_suite *suite)  	suite->suite_init_err = 0;  } -int __kunit_test_suites_init(struct kunit_suite * const * const suites) +int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_suites)  {  	unsigned int i; -	for (i = 0; suites[i] != NULL; i++) { +	for (i = 0; i < num_suites; i++) {  		kunit_init_suite(suites[i]);  		kunit_run_tests(suites[i]);  	} @@ -598,17 +603,54 @@ static void kunit_exit_suite(struct kunit_suite *suite)  	kunit_debugfs_destroy_suite(suite);  } -void __kunit_test_suites_exit(struct kunit_suite **suites) +void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites)  {  	unsigned int i; -	for (i = 0; suites[i] != NULL; i++) +	for (i = 0; i < num_suites; i++)  		kunit_exit_suite(suites[i]);  	kunit_suite_counter = 1;  }  EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); +#ifdef CONFIG_MODULES +static void kunit_module_init(struct module *mod) +{ +	__kunit_test_suites_init(mod->kunit_suites, mod->num_kunit_suites); +} + +static void kunit_module_exit(struct module *mod) +{ +	__kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites); +} + +static int kunit_module_notify(struct notifier_block *nb, unsigned long val, +			       void *data) +{ +	struct module *mod = data; + +	switch (val) { +	case MODULE_STATE_LIVE: +		kunit_module_init(mod); +		break; +	case MODULE_STATE_GOING: +		kunit_module_exit(mod); +		break; +	case MODULE_STATE_COMING: +	case MODULE_STATE_UNFORMED: +		break; +	} + +	return 0; +} + +static struct notifier_block kunit_mod_nb = { +	.notifier_call = kunit_module_notify, +	.priority = 0, +}; +#endif +  struct kunit_kmalloc_array_params {  	size_t n;  	size_t size; @@ -703,13 +745,19 @@ EXPORT_SYMBOL_GPL(kunit_cleanup);  static int __init kunit_init(void)  {  	kunit_debugfs_init(); - +#ifdef CONFIG_MODULES +	return register_module_notifier(&kunit_mod_nb); +#else  	return 0; +#endif  }  late_initcall(kunit_init);  static void __exit kunit_exit(void)  { +#ifdef CONFIG_MODULES +	unregister_module_notifier(&kunit_mod_nb); +#endif  	kunit_debugfs_cleanup();  }  module_exit(kunit_exit); diff --git a/lib/list_debug.c b/lib/list_debug.c index 9daa3fb9d1cd..d98d43f80958 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -20,7 +20,11 @@  bool __list_add_valid(struct list_head *new, struct list_head *prev,  		      struct list_head *next)  { -	if (CHECK_DATA_CORRUPTION(next->prev != prev, +	if (CHECK_DATA_CORRUPTION(prev == NULL, +			"list_add corruption. prev is NULL.\n") || +	    CHECK_DATA_CORRUPTION(next == NULL, +			"list_add corruption. next is NULL.\n") || +	    CHECK_DATA_CORRUPTION(next->prev != prev,  			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",  			prev, next->prev, next) ||  	    CHECK_DATA_CORRUPTION(prev->next != next, @@ -42,7 +46,11 @@ bool __list_del_entry_valid(struct list_head *entry)  	prev = entry->prev;  	next = entry->next; -	if (CHECK_DATA_CORRUPTION(next == LIST_POISON1, +	if (CHECK_DATA_CORRUPTION(next == NULL, +			"list_del corruption, %px->next is NULL\n", entry) || +	    CHECK_DATA_CORRUPTION(prev == NULL, +			"list_del corruption, %px->prev is NULL\n", entry) || +	    CHECK_DATA_CORRUPTION(next == LIST_POISON1,  			"list_del corruption, %px->next is LIST_POISON1 (%px)\n",  			entry, LIST_POISON1) ||  	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2, diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c index 7ac845f65be5..133929e0ce8f 100644 --- a/lib/livepatch/test_klp_callbacks_busy.c +++ b/lib/livepatch/test_klp_callbacks_busy.c @@ -16,10 +16,12 @@ MODULE_PARM_DESC(block_transition, "block_transition (default=false)");  static void busymod_work_func(struct work_struct *work);  static DECLARE_WORK(work, busymod_work_func); +static DECLARE_COMPLETION(busymod_work_started);  static void busymod_work_func(struct work_struct *work)  {  	pr_info("%s enter\n", __func__); +	complete(&busymod_work_started);  	while (READ_ONCE(block_transition)) {  		/* @@ -37,6 +39,12 @@ static int test_klp_callbacks_busy_init(void)  	pr_info("%s\n", __func__);  	schedule_work(&work); +	/* +	 * To synchronize kernel messages, hold the init function from +	 * exiting until the work function's entry message has printed. +	 */ +	wait_for_completion(&busymod_work_started); +  	if (!block_transition) {  		/*  		 * Serialize output: print all messages from the work diff --git a/lib/lockref.c b/lib/lockref.c index c6f0b183b937..45e93ece8ba0 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -111,31 +111,6 @@ int lockref_put_not_zero(struct lockref *lockref)  EXPORT_SYMBOL(lockref_put_not_zero);  /** - * lockref_get_or_lock - Increments count unless the count is 0 or dead - * @lockref: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count was zero - * and we got the lock instead. - */ -int lockref_get_or_lock(struct lockref *lockref) -{ -	CMPXCHG_LOOP( -		new.count++; -		if (old.count <= 0) -			break; -	, -		return 1; -	); - -	spin_lock(&lockref->lock); -	if (lockref->count <= 0) -		return 0; -	lockref->count++; -	spin_unlock(&lockref->lock); -	return 1; -} -EXPORT_SYMBOL(lockref_get_or_lock); - -/**   * lockref_put_return - Decrement reference count if possible   * @lockref: pointer to lockref structure   * diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 52313acbfa62..dc35464216d3 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -147,8 +147,8 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,  		return lc;  	/* else: could not allocate all elements, give up */ -	for (i--; i; i--) { -		void *p = element[i]; +	while (i) { +		void *p = element[--i];  		kmem_cache_free(cache, p - e_off);  	}  	kfree(lc); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index fd1728d94bab..59fe69a63800 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -507,9 +507,9 @@ static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,  				      (BYTE *)dest - prefixSize, NULL, 0);  } -int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, -				     int compressedSize, int maxOutputSize, -				     const void *dictStart, size_t dictSize) +static int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, +					    int compressedSize, int maxOutputSize, +					    const void *dictStart, size_t dictSize)  {  	return LZ4_decompress_generic(source, dest,  				      compressedSize, maxOutputSize, diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 76758e9296ba..9d31e7126606 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -50,9 +50,7 @@ next:  		if (dv == 0 && bitstream_version) {  			const unsigned char *ir = ip + 4; -			const unsigned char *limit = ip_end -				< (ip + MAX_ZERO_RUN_LENGTH + 1) -				? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; +			const unsigned char *limit = min(ip_end, ip + MAX_ZERO_RUN_LENGTH + 1);  #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \  	defined(LZO_FAST_64BIT_MEMORY_ACCESS)  			u64 dv64; @@ -326,7 +324,7 @@ static int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,  	data_start = op;  	while (l > 20) { -		size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); +		size_t ll = min_t(size_t, l, m4_max_offset + 1);  		uintptr_t ll_end = (uintptr_t) ip + ll;  		if ((ll_end + ((t + ll) >> 5)) <= ll_end)  			break; diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c index 2cdae54c1bd0..9056fc5167fc 100644 --- a/lib/mpi/mpi-add.c +++ b/lib/mpi/mpi-add.c @@ -138,7 +138,7 @@ void mpi_sub(MPI w, MPI u, MPI v)  	mpi_add(w, u, vv);  	mpi_free(vv);  } - +EXPORT_SYMBOL_GPL(mpi_sub);  void mpi_addm(MPI w, MPI u, MPI v, MPI m)  { diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c index 8f5fa200f297..7f4eda8560dc 100644 --- a/lib/mpi/mpi-mul.c +++ b/lib/mpi/mpi-mul.c @@ -82,6 +82,7 @@ void mpi_mul(MPI w, MPI u, MPI v)  	if (tmp_limb)  		mpi_free_limb_space(tmp_limb);  } +EXPORT_SYMBOL_GPL(mpi_mul);  void mpi_mulm(MPI w, MPI u, MPI v, MPI m)  { diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index bc81419f400c..aa8c46544af8 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -272,7 +272,7 @@ MPI mpi_set_ui(MPI w, unsigned long u)  	if (!w)  		w = mpi_alloc(1);  	/* FIXME: If U is 0 we have no need to resize and thus possible -	 * allocating the the limbs. +	 * allocating the limbs.  	 */  	RESIZE_IF_NEEDED(w, 1);  	w->d[0] = u; diff --git a/lib/nodemask.c b/lib/nodemask.c index e22647f5181b..b8a433d16b51 100644 --- a/lib/nodemask.c +++ b/lib/nodemask.c @@ -3,14 +3,6 @@  #include <linux/module.h>  #include <linux/random.h> -unsigned int __next_node_in(int node, const nodemask_t *srcp) -{ -	unsigned int ret = __next_node(node, srcp); - -	if (ret == MAX_NUMNODES) -		ret = __first_node(srcp); -	return ret; -}  EXPORT_SYMBOL(__next_node_in);  #ifdef CONFIG_NUMA diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 475f0c064bf6..7e3e43679b73 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -91,6 +91,7 @@ DEFINE_TEST_ARRAY(u32) = {  	{-4U, 5U, 1U, -9U, -20U, true, false, true},  }; +#if BITS_PER_LONG == 64  DEFINE_TEST_ARRAY(u64) = {  	{0, 0, 0, 0, 0, false, false, false},  	{1, 1, 2, 0, 1, false, false, false}, @@ -114,6 +115,7 @@ DEFINE_TEST_ARRAY(u64) = {  	 false, true, false},  	{-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true},  }; +#endif  DEFINE_TEST_ARRAY(s8) = {  	{0, 0, 0, 0, 0, false, false, false}, @@ -188,6 +190,8 @@ DEFINE_TEST_ARRAY(s32) = {  	{S32_MIN, S32_MIN, 0, 0, 0, true, false, true},  	{S32_MAX, S32_MAX, -2, 0, 1, true, false, true},  }; + +#if BITS_PER_LONG == 64  DEFINE_TEST_ARRAY(s64) = {  	{0, 0, 0, 0, 0, false, false, false}, @@ -216,6 +220,7 @@ DEFINE_TEST_ARRAY(s64) = {  	{-128, -1, -129, -127, 128, false, false, false},  	{0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false},  }; +#endif  #define check_one_op(t, fmt, op, sym, a, b, r, of) do {		\  	t _r;							\ @@ -650,6 +655,7 @@ static struct kunit_case overflow_test_cases[] = {  	KUNIT_CASE(s16_overflow_test),  	KUNIT_CASE(u32_overflow_test),  	KUNIT_CASE(s32_overflow_test), +/* Clang 13 and earlier generate unwanted libcalls on 32-bit. */  #if BITS_PER_LONG == 64  	KUNIT_CASE(u64_overflow_test),  	KUNIT_CASE(s64_overflow_test), diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b3afafe46fff..3c78e1e8b2ad 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -677,7 +677,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)  }  static inline int insert_entries(struct radix_tree_node *node, -		void __rcu **slot, void *item, bool replace) +		void __rcu **slot, void *item)  {  	if (*slot)  		return -EEXIST; @@ -711,7 +711,7 @@ int radix_tree_insert(struct radix_tree_root *root, unsigned long index,  	if (error)  		return error; -	error = insert_entries(node, slot, item, false); +	error = insert_entries(node, slot, item);  	if (error < 0)  		return error; diff --git a/lib/sbitmap.c b/lib/sbitmap.c index ae4fd4de9ebe..29eb0484215a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -528,7 +528,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,  		sbitmap_deferred_clear(map);  		if (map->word == (1UL << (map_depth - 1)) - 1) -			continue; +			goto next;  		nr = find_first_zero_bit(&map->word, map_depth);  		if (nr + nr_tags <= map_depth) { @@ -539,6 +539,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,  			get_mask = ((1UL << map_tags) - 1) << nr;  			do {  				val = READ_ONCE(map->word); +				if ((val & ~get_mask) != val) +					goto next;  				ret = atomic_long_cmpxchg(ptr, val, get_mask | val);  			} while (ret != val);  			get_mask = (get_mask & ~ret) >> nr; @@ -549,6 +551,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,  				return get_mask;  			}  		} +next:  		/* Jump to next index. */  		if (++index >= sb->map_nr)  			index = 0; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d5e82e4a57ad..c8c3d675845c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -240,7 +240,7 @@ EXPORT_SYMBOL(__sg_free_table);   **/  void sg_free_append_table(struct sg_append_table *table)  { -	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree, +	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,  			table->total_nents);  }  EXPORT_SYMBOL(sg_free_append_table); @@ -253,7 +253,7 @@ EXPORT_SYMBOL(sg_free_append_table);   **/  void sg_free_table(struct sg_table *table)  { -	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree, +	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,  			table->orig_nents);  }  EXPORT_SYMBOL(sg_free_table); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 046ac6297c78..a2bb7738c373 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -47,9 +47,9 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)  	printk("caller is %pS\n", __builtin_return_address(0));  	dump_stack(); -	instrumentation_end();  out_enable: +	instrumentation_end();  	preempt_enable_no_resched_notrace();  out:  	return this_cpu; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 5ca0d086ef4a..e73fda23388d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -32,6 +32,7 @@  #include <linux/string.h>  #include <linux/types.h>  #include <linux/memblock.h> +#include <linux/kasan-enabled.h>  #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) @@ -145,10 +146,16 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)  	return stack;  } -#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) -#define STACK_HASH_MASK (STACK_HASH_SIZE - 1) +/* one hash table bucket entry per 16kB of memory */ +#define STACK_HASH_SCALE	14 +/* limited between 4k and 1M buckets */ +#define STACK_HASH_ORDER_MIN	12 +#define STACK_HASH_ORDER_MAX	20  #define STACK_HASH_SEED 0x9747b28c +static unsigned int stack_hash_order; +static unsigned int stack_hash_mask; +  static bool stack_depot_disable;  static struct stack_record **stack_table; @@ -175,7 +182,7 @@ void __init stack_depot_want_early_init(void)  int __init stack_depot_early_init(void)  { -	size_t size; +	unsigned long entries = 0;  	/* This is supposed to be called only once, from mm_init() */  	if (WARN_ON(__stack_depot_early_init_passed)) @@ -183,13 +190,23 @@ int __init stack_depot_early_init(void)  	__stack_depot_early_init_passed = true; +	if (kasan_enabled() && !stack_hash_order) +		stack_hash_order = STACK_HASH_ORDER_MAX; +  	if (!__stack_depot_want_early_init || stack_depot_disable)  		return 0; -	size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); -	pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n", -		size); -	stack_table = memblock_alloc(size, SMP_CACHE_BYTES); +	if (stack_hash_order) +		entries = 1UL <<  stack_hash_order; +	stack_table = alloc_large_system_hash("stackdepot", +						sizeof(struct stack_record *), +						entries, +						STACK_HASH_SCALE, +						HASH_EARLY | HASH_ZERO, +						NULL, +						&stack_hash_mask, +						1UL << STACK_HASH_ORDER_MIN, +						1UL << STACK_HASH_ORDER_MAX);  	if (!stack_table) {  		pr_err("Stack Depot hash table allocation failed, disabling\n"); @@ -207,13 +224,35 @@ int stack_depot_init(void)  	mutex_lock(&stack_depot_init_mutex);  	if (!stack_depot_disable && !stack_table) { -		pr_info("Stack Depot allocating hash table with kvcalloc\n"); -		stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL); +		unsigned long entries; +		int scale = STACK_HASH_SCALE; + +		if (stack_hash_order) { +			entries = 1UL << stack_hash_order; +		} else { +			entries = nr_free_buffer_pages(); +			entries = roundup_pow_of_two(entries); + +			if (scale > PAGE_SHIFT) +				entries >>= (scale - PAGE_SHIFT); +			else +				entries <<= (PAGE_SHIFT - scale); +		} + +		if (entries < 1UL << STACK_HASH_ORDER_MIN) +			entries = 1UL << STACK_HASH_ORDER_MIN; +		if (entries > 1UL << STACK_HASH_ORDER_MAX) +			entries = 1UL << STACK_HASH_ORDER_MAX; + +		pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n", +				entries); +		stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL);  		if (!stack_table) {  			pr_err("Stack Depot hash table allocation failed, disabling\n");  			stack_depot_disable = true;  			ret = -ENOMEM;  		} +		stack_hash_mask = entries - 1;  	}  	mutex_unlock(&stack_depot_init_mutex);  	return ret; @@ -386,7 +425,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,  		goto fast_exit;  	hash = hash_stack(entries, nr_entries); -	bucket = &stack_table[hash & STACK_HASH_MASK]; +	bucket = &stack_table[hash & stack_hash_mask];  	/*  	 * Fast path: look the stack trace up without locking. diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index d5923a640457..98754ff9fe68 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -604,6 +604,12 @@ static void __init test_bitmap_arr64(void)  			pr_err("bitmap_copy_arr64(nbits == %d:"  				" tail is not safely cleared: %d\n", nbits, next_bit); +		if ((nbits % 64) && +		    (arr[(nbits - 1) / 64] & ~GENMASK_ULL((nbits - 1) % 64, 0))) +			pr_err("bitmap_to_arr64(nbits == %d): tail is not safely cleared: 0x%016llx (must be 0x%016llx)\n", +			       nbits, arr[(nbits - 1) / 64], +			       GENMASK_ULL((nbits - 1) % 64, 0)); +  		if (nbits < EXP1_IN_BITS - 64)  			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 64)], 0xa5a5a5a5);  	} @@ -869,6 +875,67 @@ static void __init test_bitmap_print_buf(void)  	}  } +static void __init test_bitmap_const_eval(void) +{ +	DECLARE_BITMAP(bitmap, BITS_PER_LONG); +	unsigned long initvar = BIT(2); +	unsigned long bitopvar = 0; +	unsigned long var = 0; +	int res; + +	/* +	 * Compilers must be able to optimize all of those to compile-time +	 * constants on any supported optimization level (-O2, -Os) and any +	 * architecture. Otherwise, trigger a build bug. +	 * The whole function gets optimized out then, there's nothing to do +	 * in runtime. +	 */ + +	/* +	 * Equals to `unsigned long bitmap[1] = { GENMASK(6, 5), }`. +	 * Clang on s390 optimizes bitops at compile-time as intended, but at +	 * the same time stops treating @bitmap and @bitopvar as compile-time +	 * constants after regular test_bit() is executed, thus triggering the +	 * build bugs below. So, call const_test_bit() there directly until +	 * the compiler is fixed. +	 */ +	bitmap_clear(bitmap, 0, BITS_PER_LONG); +#if defined(__s390__) && defined(__clang__) +	if (!const_test_bit(7, bitmap)) +#else +	if (!test_bit(7, bitmap)) +#endif +		bitmap_set(bitmap, 5, 2); + +	/* Equals to `unsigned long bitopvar = BIT(20)` */ +	__change_bit(31, &bitopvar); +	bitmap_shift_right(&bitopvar, &bitopvar, 11, BITS_PER_LONG); + +	/* Equals to `unsigned long var = BIT(25)` */ +	var |= BIT(25); +	if (var & BIT(0)) +		var ^= GENMASK(9, 6); + +	/* __const_hweight<32|64>(GENMASK(6, 5)) == 2 */ +	res = bitmap_weight(bitmap, 20); +	BUILD_BUG_ON(!__builtin_constant_p(res)); +	BUILD_BUG_ON(res != 2); + +	/* !(BIT(31) & BIT(18)) == 1 */ +	res = !test_bit(18, &bitopvar); +	BUILD_BUG_ON(!__builtin_constant_p(res)); +	BUILD_BUG_ON(!res); + +	/* BIT(2) & GENMASK(14, 8) == 0 */ +	res = initvar & GENMASK(14, 8); +	BUILD_BUG_ON(!__builtin_constant_p(res)); +	BUILD_BUG_ON(res); + +	/* ~BIT(25) */ +	BUILD_BUG_ON(!__builtin_constant_p(~var)); +	BUILD_BUG_ON(~var != ~BIT(25)); +} +  static void __init selftest(void)  {  	test_zero_clear(); @@ -884,6 +951,7 @@ static void __init selftest(void)  	test_for_each_set_clump8();  	test_bitmap_cut();  	test_bitmap_print_buf(); +	test_bitmap_const_eval();  }  KSTM_MODULE_LOADERS(test_bitmap); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2a7836e115b4..5820704165a6 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {  		.build_skb = build_test_skb_linear_no_head_frag,  		.features = NETIF_F_SG | NETIF_F_FRAGLIST |  			    NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO | -			    NETIF_F_LLTX_BIT | NETIF_F_GRO | +			    NETIF_F_LLTX | NETIF_F_GRO |  			    NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | -			    NETIF_F_HW_VLAN_STAG_TX_BIT +			    NETIF_F_HW_VLAN_STAG_TX  	}  }; diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c new file mode 100644 index 000000000000..a31a1622f1f6 --- /dev/null +++ b/lib/test_cpumask.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for cpumask. + * + * Author: Sander Vanheule <sander@svanheule.net> + */ + +#include <kunit/test.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> + +#define EXPECT_FOR_EACH_CPU_EQ(test, mask)			\ +	do {							\ +		const cpumask_t *m = (mask);			\ +		int mask_weight = cpumask_weight(m);		\ +		int cpu, iter = 0;				\ +		for_each_cpu(cpu, m)				\ +			iter++;					\ +		KUNIT_EXPECT_EQ((test), mask_weight, iter);	\ +	} while (0) + +#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask)					\ +	do {									\ +		const cpumask_t *m = (mask);					\ +		int mask_weight = cpumask_weight(m);				\ +		int cpu, iter = 0;						\ +		for_each_cpu_not(cpu, m)					\ +			iter++;							\ +		KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter);	\ +	} while (0) + +#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask)			\ +	do {							\ +		const cpumask_t *m = (mask);			\ +		int mask_weight = cpumask_weight(m);		\ +		int cpu, iter = 0;				\ +		for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2)	\ +			iter++;					\ +		KUNIT_EXPECT_EQ((test), mask_weight, iter);	\ +	} while (0) + +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name)		\ +	do {							\ +		int mask_weight = num_##name##_cpus();		\ +		int cpu, iter = 0;				\ +		for_each_##name##_cpu(cpu)			\ +			iter++;					\ +		KUNIT_EXPECT_EQ((test), mask_weight, iter);	\ +	} while (0) + +static cpumask_t mask_empty; +static cpumask_t mask_all; + +static void test_cpumask_weight(struct kunit *test) +{ +	KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty)); +	KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask)); +	KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all)); + +	KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty)); +	KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask)); +	KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all)); +} + +static void test_cpumask_first(struct kunit *test) +{ +	KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty)); +	KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask)); + +	KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty)); +	KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask)); +} + +static void test_cpumask_last(struct kunit *test) +{ +	KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty)); +	KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask)); +} + +static void test_cpumask_next(struct kunit *test) +{ +	KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty)); +	KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask)); + +	KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty)); +	KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask)); +} + +static void test_cpumask_iterators(struct kunit *test) +{ +	EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); +	EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); +	EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); + +	EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); +	EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); +	EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); +} + +static void test_cpumask_iterators_builtin(struct kunit *test) +{ +	EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible); + +	/* Ensure the dynamic masks are stable while running the tests */ +	cpu_hotplug_disable(); + +	EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online); +	EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present); + +	cpu_hotplug_enable(); +} + +static int test_cpumask_init(struct kunit *test) +{ +	cpumask_clear(&mask_empty); +	cpumask_setall(&mask_all); + +	return 0; +} + +static struct kunit_case test_cpumask_cases[] = { +	KUNIT_CASE(test_cpumask_weight), +	KUNIT_CASE(test_cpumask_first), +	KUNIT_CASE(test_cpumask_last), +	KUNIT_CASE(test_cpumask_next), +	KUNIT_CASE(test_cpumask_iterators), +	KUNIT_CASE(test_cpumask_iterators_builtin), +	{} +}; + +static struct kunit_suite test_cpumask_suite = { +	.name = "cpumask", +	.init = test_cpumask_init, +	.test_cases = test_cpumask_cases, +}; +kunit_test_suite(test_cpumask_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c index 25ae1ac2624a..9ebf6f5549f3 100644 --- a/lib/test_free_pages.c +++ b/lib/test_free_pages.c @@ -17,7 +17,7 @@ static void test_free_pages(gfp_t gfp)  	for (i = 0; i < 1000 * 1000; i++) {  		unsigned long addr = __get_free_pages(gfp, 3); -		struct page *page = virt_to_page(addr); +		struct page *page = virt_to_page((void *)addr);  		/* Simulate page cache getting a speculative reference */  		get_page(page); diff --git a/lib/test_hmm.c b/lib/test_hmm.c index cfe632047839..e3965cafd27c 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -32,11 +32,32 @@  #include "test_hmm_uapi.h" -#define DMIRROR_NDEVICES		2 +#define DMIRROR_NDEVICES		4  #define DMIRROR_RANGE_FAULT_TIMEOUT	1000  #define DEVMEM_CHUNK_SIZE		(256 * 1024 * 1024U)  #define DEVMEM_CHUNKS_RESERVE		16 +/* + * For device_private pages, dpage is just a dummy struct page + * representing a piece of device memory. dmirror_devmem_alloc_page + * allocates a real system memory page as backing storage to fake a + * real device. zone_device_data points to that backing page. But + * for device_coherent memory, the struct page represents real + * physical CPU-accessible memory that we can use directly. + */ +#define BACKING_PAGE(page) (is_device_private_page((page)) ? \ +			   (page)->zone_device_data : (page)) + +static unsigned long spm_addr_dev0; +module_param(spm_addr_dev0, long, 0644); +MODULE_PARM_DESC(spm_addr_dev0, +		"Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); + +static unsigned long spm_addr_dev1; +module_param(spm_addr_dev1, long, 0644); +MODULE_PARM_DESC(spm_addr_dev1, +		"Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE."); +  static const struct dev_pagemap_ops dmirror_devmem_ops;  static const struct mmu_interval_notifier_ops dmirror_min_ops;  static dev_t dmirror_dev; @@ -87,6 +108,7 @@ struct dmirror_chunk {  struct dmirror_device {  	struct cdev		cdevice;  	struct hmm_devmem	*devmem; +	unsigned int            zone_device_type;  	unsigned int		devmem_capacity;  	unsigned int		devmem_count; @@ -114,6 +136,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce,  	return 0;  } +static bool dmirror_is_private_zone(struct dmirror_device *mdevice) +{ +	return (mdevice->zone_device_type == +		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; +} + +static enum migrate_vma_direction +dmirror_select_device(struct dmirror *dmirror) +{ +	return (dmirror->mdevice->zone_device_type == +		HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? +		MIGRATE_VMA_SELECT_DEVICE_PRIVATE : +		MIGRATE_VMA_SELECT_DEVICE_COHERENT; +} +  static void dmirror_bounce_fini(struct dmirror_bounce *bounce)  {  	vfree(bounce->ptr); @@ -454,28 +491,44 @@ fini:  	return ret;  } -static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, +static int dmirror_allocate_chunk(struct dmirror_device *mdevice,  				   struct page **ppage)  {  	struct dmirror_chunk *devmem; -	struct resource *res; +	struct resource *res = NULL;  	unsigned long pfn;  	unsigned long pfn_first;  	unsigned long pfn_last;  	void *ptr; +	int ret = -ENOMEM;  	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);  	if (!devmem) -		return false; +		return ret; -	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, -				      "hmm_dmirror"); -	if (IS_ERR(res)) +	switch (mdevice->zone_device_type) { +	case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE: +		res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, +					      "hmm_dmirror"); +		if (IS_ERR_OR_NULL(res)) +			goto err_devmem; +		devmem->pagemap.range.start = res->start; +		devmem->pagemap.range.end = res->end; +		devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; +		break; +	case HMM_DMIRROR_MEMORY_DEVICE_COHERENT: +		devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ? +							spm_addr_dev0 : +							spm_addr_dev1; +		devmem->pagemap.range.end = devmem->pagemap.range.start + +					    DEVMEM_CHUNK_SIZE - 1; +		devmem->pagemap.type = MEMORY_DEVICE_COHERENT; +		break; +	default: +		ret = -EINVAL;  		goto err_devmem; +	} -	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; -	devmem->pagemap.range.start = res->start; -	devmem->pagemap.range.end = res->end;  	devmem->pagemap.nr_range = 1;  	devmem->pagemap.ops = &dmirror_devmem_ops;  	devmem->pagemap.owner = mdevice; @@ -496,10 +549,14 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,  		mdevice->devmem_capacity = new_capacity;  		mdevice->devmem_chunks = new_chunks;  	} -  	ptr = memremap_pages(&devmem->pagemap, numa_node_id()); -	if (IS_ERR(ptr)) +	if (IS_ERR_OR_NULL(ptr)) { +		if (ptr) +			ret = PTR_ERR(ptr); +		else +			ret = -EFAULT;  		goto err_release; +	}  	devmem->mdevice = mdevice;  	pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT; @@ -528,30 +585,35 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,  	}  	spin_unlock(&mdevice->lock); -	return true; +	return 0;  err_release:  	mutex_unlock(&mdevice->devmem_lock); -	release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range)); +	if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) +		release_mem_region(devmem->pagemap.range.start, +				   range_len(&devmem->pagemap.range));  err_devmem:  	kfree(devmem); -	return false; +	return ret;  }  static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)  {  	struct page *dpage = NULL; -	struct page *rpage; +	struct page *rpage = NULL;  	/* -	 * This is a fake device so we alloc real system memory to store -	 * our device memory. +	 * For ZONE_DEVICE private type, this is a fake device so we allocate +	 * real system memory to store our device memory. +	 * For ZONE_DEVICE coherent type we use the actual dpage to store the +	 * data and ignore rpage.  	 */ -	rpage = alloc_page(GFP_HIGHUSER); -	if (!rpage) -		return NULL; - +	if (dmirror_is_private_zone(mdevice)) { +		rpage = alloc_page(GFP_HIGHUSER); +		if (!rpage) +			return NULL; +	}  	spin_lock(&mdevice->lock);  	if (mdevice->free_pages) { @@ -561,7 +623,7 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)  		spin_unlock(&mdevice->lock);  	} else {  		spin_unlock(&mdevice->lock); -		if (!dmirror_allocate_chunk(mdevice, &dpage)) +		if (dmirror_allocate_chunk(mdevice, &dpage))  			goto error;  	} @@ -570,7 +632,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)  	return dpage;  error: -	__free_page(rpage); +	if (rpage) +		__free_page(rpage);  	return NULL;  } @@ -596,12 +659,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,  		 * unallocated pte_none() or read-only zero page.  		 */  		spage = migrate_pfn_to_page(*src); +		if (WARN(spage && is_zone_device_page(spage), +		     "page already in device spage pfn: 0x%lx\n", +		     page_to_pfn(spage))) +			continue;  		dpage = dmirror_devmem_alloc_page(mdevice);  		if (!dpage)  			continue; -		rpage = dpage->zone_device_data; +		rpage = BACKING_PAGE(dpage);  		if (spage)  			copy_highpage(rpage, spage);  		else @@ -615,6 +682,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,  		 */  		rpage->zone_device_data = dmirror; +		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n", +			 page_to_pfn(spage), page_to_pfn(dpage));  		*dst = migrate_pfn(page_to_pfn(dpage));  		if ((*src & MIGRATE_PFN_WRITE) ||  		    (!spage && args->vma->vm_flags & VM_WRITE)) @@ -692,11 +761,7 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,  		if (!dpage)  			continue; -		/* -		 * Store the page that holds the data so the page table -		 * doesn't have to deal with ZONE_DEVICE private pages. -		 */ -		entry = dpage->zone_device_data; +		entry = BACKING_PAGE(dpage);  		if (*dst & MIGRATE_PFN_WRITE)  			entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);  		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC); @@ -732,7 +797,7 @@ static int dmirror_exclusive(struct dmirror *dmirror,  	mmap_read_lock(mm);  	for (addr = start; addr < end; addr = next) { -		unsigned long mapped; +		unsigned long mapped = 0;  		int i;  		if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) @@ -741,7 +806,13 @@ static int dmirror_exclusive(struct dmirror *dmirror,  			next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);  		ret = make_device_exclusive_range(mm, addr, next, pages, NULL); -		mapped = dmirror_atomic_map(addr, next, pages, dmirror); +		/* +		 * Do dmirror_atomic_map() iff all pages are marked for +		 * exclusive access to avoid accessing uninitialized +		 * fields of pages. +		 */ +		if (ret == (next - addr) >> PAGE_SHIFT) +			mapped = dmirror_atomic_map(addr, next, pages, dmirror);  		for (i = 0; i < ret; i++) {  			if (pages[i]) {  				unlock_page(pages[i]); @@ -776,15 +847,126 @@ static int dmirror_exclusive(struct dmirror *dmirror,  	return ret;  } -static int dmirror_migrate(struct dmirror *dmirror, -			   struct hmm_dmirror_cmd *cmd) +static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, +						      struct dmirror *dmirror) +{ +	const unsigned long *src = args->src; +	unsigned long *dst = args->dst; +	unsigned long start = args->start; +	unsigned long end = args->end; +	unsigned long addr; + +	for (addr = start; addr < end; addr += PAGE_SIZE, +				       src++, dst++) { +		struct page *dpage, *spage; + +		spage = migrate_pfn_to_page(*src); +		if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) +			continue; + +		if (WARN_ON(!is_device_private_page(spage) && +			    !is_device_coherent_page(spage))) +			continue; +		spage = BACKING_PAGE(spage); +		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); +		if (!dpage) +			continue; +		pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n", +			 page_to_pfn(spage), page_to_pfn(dpage)); + +		lock_page(dpage); +		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); +		copy_highpage(dpage, spage); +		*dst = migrate_pfn(page_to_pfn(dpage)); +		if (*src & MIGRATE_PFN_WRITE) +			*dst |= MIGRATE_PFN_WRITE; +	} +	return 0; +} + +static unsigned long +dmirror_successful_migrated_pages(struct migrate_vma *migrate) +{ +	unsigned long cpages = 0; +	unsigned long i; + +	for (i = 0; i < migrate->npages; i++) { +		if (migrate->src[i] & MIGRATE_PFN_VALID && +		    migrate->src[i] & MIGRATE_PFN_MIGRATE) +			cpages++; +	} +	return cpages; +} + +static int dmirror_migrate_to_system(struct dmirror *dmirror, +				     struct hmm_dmirror_cmd *cmd)  {  	unsigned long start, end, addr;  	unsigned long size = cmd->npages << PAGE_SHIFT;  	struct mm_struct *mm = dmirror->notifier.mm;  	struct vm_area_struct *vma; -	unsigned long src_pfns[64]; -	unsigned long dst_pfns[64]; +	unsigned long src_pfns[64] = { 0 }; +	unsigned long dst_pfns[64] = { 0 }; +	struct migrate_vma args; +	unsigned long next; +	int ret; + +	start = cmd->addr; +	end = start + size; +	if (end < start) +		return -EINVAL; + +	/* Since the mm is for the mirrored process, get a reference first. */ +	if (!mmget_not_zero(mm)) +		return -EINVAL; + +	cmd->cpages = 0; +	mmap_read_lock(mm); +	for (addr = start; addr < end; addr = next) { +		vma = vma_lookup(mm, addr); +		if (!vma || !(vma->vm_flags & VM_READ)) { +			ret = -EINVAL; +			goto out; +		} +		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT)); +		if (next > vma->vm_end) +			next = vma->vm_end; + +		args.vma = vma; +		args.src = src_pfns; +		args.dst = dst_pfns; +		args.start = addr; +		args.end = next; +		args.pgmap_owner = dmirror->mdevice; +		args.flags = dmirror_select_device(dmirror); + +		ret = migrate_vma_setup(&args); +		if (ret) +			goto out; + +		pr_debug("Migrating from device mem to sys mem\n"); +		dmirror_devmem_fault_alloc_and_copy(&args, dmirror); + +		migrate_vma_pages(&args); +		cmd->cpages += dmirror_successful_migrated_pages(&args); +		migrate_vma_finalize(&args); +	} +out: +	mmap_read_unlock(mm); +	mmput(mm); + +	return ret; +} + +static int dmirror_migrate_to_device(struct dmirror *dmirror, +				struct hmm_dmirror_cmd *cmd) +{ +	unsigned long start, end, addr; +	unsigned long size = cmd->npages << PAGE_SHIFT; +	struct mm_struct *mm = dmirror->notifier.mm; +	struct vm_area_struct *vma; +	unsigned long src_pfns[64] = { 0 }; +	unsigned long dst_pfns[64] = { 0 };  	struct dmirror_bounce bounce;  	struct migrate_vma args;  	unsigned long next; @@ -821,6 +1003,7 @@ static int dmirror_migrate(struct dmirror *dmirror,  		if (ret)  			goto out; +		pr_debug("Migrating from sys mem to device mem\n");  		dmirror_migrate_alloc_and_copy(&args, dmirror);  		migrate_vma_pages(&args);  		dmirror_migrate_finalize_and_map(&args, dmirror); @@ -829,7 +1012,10 @@ static int dmirror_migrate(struct dmirror *dmirror,  	mmap_read_unlock(mm);  	mmput(mm); -	/* Return the migrated data for verification. */ +	/* +	 * Return the migrated data for verification. +	 * Only for pages in device zone +	 */  	ret = dmirror_bounce_init(&bounce, start, size);  	if (ret)  		return ret; @@ -872,6 +1058,12 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,  			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;  		else  			*perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE; +	} else if (is_device_coherent_page(page)) { +		/* Is the page migrated to this device or some other? */ +		if (dmirror->mdevice == dmirror_page_to_device(page)) +			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL; +		else +			*perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;  	} else if (is_zero_pfn(page_to_pfn(page)))  		*perm = HMM_DMIRROR_PROT_ZERO;  	else @@ -1059,8 +1251,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,  		ret = dmirror_write(dmirror, &cmd);  		break; -	case HMM_DMIRROR_MIGRATE: -		ret = dmirror_migrate(dmirror, &cmd); +	case HMM_DMIRROR_MIGRATE_TO_DEV: +		ret = dmirror_migrate_to_device(dmirror, &cmd); +		break; + +	case HMM_DMIRROR_MIGRATE_TO_SYS: +		ret = dmirror_migrate_to_system(dmirror, &cmd);  		break;  	case HMM_DMIRROR_EXCLUSIVE: @@ -1122,14 +1318,13 @@ static const struct file_operations dmirror_fops = {  static void dmirror_devmem_free(struct page *page)  { -	struct page *rpage = page->zone_device_data; +	struct page *rpage = BACKING_PAGE(page);  	struct dmirror_device *mdevice; -	if (rpage) +	if (rpage != page)  		__free_page(rpage);  	mdevice = dmirror_page_to_device(page); -  	spin_lock(&mdevice->lock);  	mdevice->cfree++;  	page->zone_device_data = mdevice->free_pages; @@ -1137,43 +1332,11 @@ static void dmirror_devmem_free(struct page *page)  	spin_unlock(&mdevice->lock);  } -static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, -						      struct dmirror *dmirror) -{ -	const unsigned long *src = args->src; -	unsigned long *dst = args->dst; -	unsigned long start = args->start; -	unsigned long end = args->end; -	unsigned long addr; - -	for (addr = start; addr < end; addr += PAGE_SIZE, -				       src++, dst++) { -		struct page *dpage, *spage; - -		spage = migrate_pfn_to_page(*src); -		if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) -			continue; -		spage = spage->zone_device_data; - -		dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr); -		if (!dpage) -			continue; - -		lock_page(dpage); -		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT); -		copy_highpage(dpage, spage); -		*dst = migrate_pfn(page_to_pfn(dpage)); -		if (*src & MIGRATE_PFN_WRITE) -			*dst |= MIGRATE_PFN_WRITE; -	} -	return 0; -} -  static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)  {  	struct migrate_vma args; -	unsigned long src_pfns; -	unsigned long dst_pfns; +	unsigned long src_pfns = 0; +	unsigned long dst_pfns = 0;  	struct page *rpage;  	struct dmirror *dmirror;  	vm_fault_t ret; @@ -1193,7 +1356,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)  	args.src = &src_pfns;  	args.dst = &dst_pfns;  	args.pgmap_owner = dmirror->mdevice; -	args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; +	args.flags = dmirror_select_device(dmirror);  	if (migrate_vma_setup(&args))  		return VM_FAULT_SIGBUS; @@ -1231,10 +1394,8 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)  	if (ret)  		return ret; -	/* Build a list of free ZONE_DEVICE private struct pages */ -	dmirror_allocate_chunk(mdevice, NULL); - -	return 0; +	/* Build a list of free ZONE_DEVICE struct pages */ +	return dmirror_allocate_chunk(mdevice, NULL);  }  static void dmirror_device_remove(struct dmirror_device *mdevice) @@ -1247,8 +1408,9 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)  				mdevice->devmem_chunks[i];  			memunmap_pages(&devmem->pagemap); -			release_mem_region(devmem->pagemap.range.start, -					   range_len(&devmem->pagemap.range)); +			if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE) +				release_mem_region(devmem->pagemap.range.start, +						   range_len(&devmem->pagemap.range));  			kfree(devmem);  		}  		kfree(mdevice->devmem_chunks); @@ -1260,14 +1422,26 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)  static int __init hmm_dmirror_init(void)  {  	int ret; -	int id; +	int id = 0; +	int ndevices = 0;  	ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,  				  "HMM_DMIRROR");  	if (ret)  		goto err_unreg; -	for (id = 0; id < DMIRROR_NDEVICES; id++) { +	memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0])); +	dmirror_devices[ndevices++].zone_device_type = +				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; +	dmirror_devices[ndevices++].zone_device_type = +				HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; +	if (spm_addr_dev0 && spm_addr_dev1) { +		dmirror_devices[ndevices++].zone_device_type = +					HMM_DMIRROR_MEMORY_DEVICE_COHERENT; +		dmirror_devices[ndevices++].zone_device_type = +					HMM_DMIRROR_MEMORY_DEVICE_COHERENT; +	} +	for (id = 0; id < ndevices; id++) {  		ret = dmirror_device_init(dmirror_devices + id, id);  		if (ret)  			goto err_chrdev; @@ -1289,7 +1463,8 @@ static void __exit hmm_dmirror_exit(void)  	int id;  	for (id = 0; id < DMIRROR_NDEVICES; id++) -		dmirror_device_remove(dmirror_devices + id); +		if (dmirror_devices[id].zone_device_type) +			dmirror_device_remove(dmirror_devices + id);  	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);  } diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index f14dea5dcd06..e31d58c9034a 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -31,10 +31,11 @@ struct hmm_dmirror_cmd {  /* Expose the address space of the calling process through hmm device file */  #define HMM_DMIRROR_READ		_IOWR('H', 0x00, struct hmm_dmirror_cmd)  #define HMM_DMIRROR_WRITE		_IOWR('H', 0x01, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_MIGRATE		_IOWR('H', 0x02, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_SNAPSHOT		_IOWR('H', 0x03, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_EXCLUSIVE		_IOWR('H', 0x04, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_CHECK_EXCLUSIVE	_IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_DEV	_IOWR('H', 0x02, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_SYS	_IOWR('H', 0x03, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_SNAPSHOT		_IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_EXCLUSIVE		_IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_CHECK_EXCLUSIVE	_IOWR('H', 0x06, struct hmm_dmirror_cmd)  /*   * Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT. @@ -49,6 +50,8 @@ struct hmm_dmirror_cmd {   *					device the ioctl() is made   * HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some   *					other device + * HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device + *				  the ioctl() is made   */  enum {  	HMM_DMIRROR_PROT_ERROR			= 0xFF, @@ -60,6 +63,14 @@ enum {  	HMM_DMIRROR_PROT_ZERO			= 0x10,  	HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL	= 0x20,  	HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE	= 0x30, +	HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL	= 0x40, +	HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE	= 0x50, +}; + +enum { +	/* 0 is reserved to catch uninitialized type fields */ +	HMM_DMIRROR_MEMORY_DEVICE_PRIVATE = 1, +	HMM_DMIRROR_MEMORY_DEVICE_COHERENT,  };  #endif /* _LIB_TEST_HMM_UAPI_H */ diff --git a/lib/test_kasan.c b/lib/test_kasan.c index c233b1a4e984..58c1b01ccfe2 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -131,6 +131,7 @@ static void kmalloc_oob_right(struct kunit *test)  	ptr = kmalloc(size, GFP_KERNEL);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	/*  	 * An unaligned access past the requested kmalloc size.  	 * Only generic KASAN can precisely detect these. @@ -159,6 +160,7 @@ static void kmalloc_oob_left(struct kunit *test)  	ptr = kmalloc(size, GFP_KERNEL);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1));  	kfree(ptr);  } @@ -171,6 +173,7 @@ static void kmalloc_node_oob_right(struct kunit *test)  	ptr = kmalloc_node(size, GFP_KERNEL, 0);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);  	kfree(ptr);  } @@ -191,6 +194,7 @@ static void kmalloc_pagealloc_oob_right(struct kunit *test)  	ptr = kmalloc(size, GFP_KERNEL);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);  	kfree(ptr); @@ -271,6 +275,7 @@ static void kmalloc_large_oob_right(struct kunit *test)  	ptr = kmalloc(size, GFP_KERNEL);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);  	kfree(ptr);  } @@ -410,6 +415,8 @@ static void kmalloc_oob_16(struct kunit *test)  	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); +	OPTIMIZER_HIDE_VAR(ptr1); +	OPTIMIZER_HIDE_VAR(ptr2);  	KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);  	kfree(ptr1);  	kfree(ptr2); @@ -756,6 +763,8 @@ static void ksize_unpoisons_memory(struct kunit *test)  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);  	real_size = ksize(ptr); +	OPTIMIZER_HIDE_VAR(ptr); +  	/* This access shouldn't trigger a KASAN report. */  	ptr[size] = 'x'; @@ -778,6 +787,7 @@ static void ksize_uaf(struct kunit *test)  	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);  	kfree(ptr); +	OPTIMIZER_HIDE_VAR(ptr);  	KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));  	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);  	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]); diff --git a/lib/test_printf.c b/lib/test_printf.c index 07309c45f327..4bd15a593fbd 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -30,6 +30,12 @@  #define PAD_SIZE 16  #define FILL_CHAR '$' +#define NOWARN(option, comment, block) \ +	__diag_push(); \ +	__diag_ignore_all(#option, comment); \ +	block \ +	__diag_pop(); +  KSTM_MODULE_GLOBALS();  static char *test_buffer __initdata; @@ -78,12 +84,17 @@ do_test(int bufsize, const char *expect, int elen,  		return 1;  	} -	if (memchr_inv(test_buffer + written + 1, FILL_CHAR, BUF_SIZE + PAD_SIZE - (written + 1))) { +	if (memchr_inv(test_buffer + written + 1, FILL_CHAR, bufsize - (written + 1))) {  		pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote beyond the nul-terminator\n",  			bufsize, fmt);  		return 1;  	} +	if (memchr_inv(test_buffer + bufsize, FILL_CHAR, BUF_SIZE + PAD_SIZE - bufsize)) { +		pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote beyond buffer\n", bufsize, fmt); +		return 1; +	} +  	if (memcmp(test_buffer, expect, written)) {  		pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n",  			bufsize, fmt, test_buffer, written, expect); @@ -154,9 +165,11 @@ test_number(void)  	test("0x1234abcd  ", "%#-12x", 0x1234abcd);  	test("  0x1234abcd", "%#12x", 0x1234abcd);  	test("0|001| 12|+123| 1234|-123|-1234", "%d|%03d|%3d|%+d|% d|%+d|% d", 0, 1, 12, 123, 1234, -123, -1234); -	test("0|1|1|128|255", "%hhu|%hhu|%hhu|%hhu|%hhu", 0, 1, 257, 128, -1); -	test("0|1|1|-128|-1", "%hhd|%hhd|%hhd|%hhd|%hhd", 0, 1, 257, 128, -1); -	test("2015122420151225", "%ho%ho%#ho", 1037, 5282, -11627); +	NOWARN(-Wformat, "Intentionally test narrowing conversion specifiers.", { +		test("0|1|1|128|255", "%hhu|%hhu|%hhu|%hhu|%hhu", 0, 1, 257, 128, -1); +		test("0|1|1|-128|-1", "%hhd|%hhd|%hhd|%hhd|%hhd", 0, 1, 257, 128, -1); +		test("2015122420151225", "%ho%ho%#ho", 1037, 5282, -11627); +	})  	/*  	 * POSIX/C99: »The result of converting zero with an explicit  	 * precision of zero shall be no characters.« Hence the output diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index cf41fd6df42a..4f2f2d1bac56 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -74,12 +74,13 @@ test_report_one_done(void)  static int random_size_align_alloc_test(void)  { -	unsigned long size, align, rnd; +	unsigned long size, align; +	unsigned int rnd;  	void *ptr;  	int i;  	for (i = 0; i < test_loop_count; i++) { -		get_random_bytes(&rnd, sizeof(rnd)); +		rnd = prandom_u32();  		/*  		 * Maximum 1024 pages, if PAGE_SIZE is 4096. @@ -150,7 +151,7 @@ static int random_size_alloc_test(void)  	int i;  	for (i = 0; i < test_loop_count; i++) { -		get_random_bytes(&n, sizeof(i)); +		n = prandom_u32();  		n = (n % 100) + 1;  		p = vmalloc(n * PAGE_SIZE); @@ -294,14 +295,14 @@ pcpu_alloc_test(void)  	for (i = 0; i < 35000; i++) {  		unsigned int r; -		get_random_bytes(&r, sizeof(i)); +		r = prandom_u32();  		size = (r % (PAGE_SIZE / 4)) + 1;  		/*  		 * Maximum PAGE_SIZE  		 */ -		get_random_bytes(&r, sizeof(i)); -		align = 1 << ((i % 11) + 1); +		r = prandom_u32(); +		align = 1 << ((r % 11) + 1);  		pcpu[i] = __alloc_percpu(size, align);  		if (!pcpu[i]) @@ -396,7 +397,7 @@ static void shuffle_array(int *arr, int n)  	int i, j;  	for (i = n - 1; i > 0; i--)  { -		get_random_bytes(&rnd, sizeof(rnd)); +		rnd = prandom_u32();  		/* Cut the range. */  		j = rnd % i; diff --git a/lib/trace_readwrite.c b/lib/trace_readwrite.c new file mode 100644 index 000000000000..88637038b30c --- /dev/null +++ b/lib/trace_readwrite.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Register read and write tracepoints + * + * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#include <linux/ftrace.h> +#include <linux/module.h> +#include <asm-generic/io.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/rwmmio.h> + +#ifdef CONFIG_TRACE_MMIO_ACCESS +void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, +		    unsigned long caller_addr) +{ +	trace_rwmmio_write(caller_addr, val, width, addr); +} +EXPORT_SYMBOL_GPL(log_write_mmio); +EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_write); + +void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, +			 unsigned long caller_addr) +{ +	trace_rwmmio_post_write(caller_addr, val, width, addr); +} +EXPORT_SYMBOL_GPL(log_post_write_mmio); +EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_write); + +void log_read_mmio(u8 width, const volatile void __iomem *addr, +		   unsigned long caller_addr) +{ +	trace_rwmmio_read(caller_addr, width, addr); +} +EXPORT_SYMBOL_GPL(log_read_mmio); +EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_read); + +void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, +			unsigned long caller_addr) +{ +	trace_rwmmio_post_read(caller_addr, val, width, addr); +} +EXPORT_SYMBOL_GPL(log_post_read_mmio); +EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_read); +#endif /* CONFIG_TRACE_MMIO_ACCESS */ diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 4cf250031f0f..1f2234221dd1 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -80,7 +80,7 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)  			/* London calling... */  			DEBUGP("found!\n"); -			return consumed += (shift-(bm->patlen-1)); +			return consumed + (shift-(bm->patlen-1));  next:			bs = bm->bad_shift[text[shift-i]];  | 
