From 5e21f2d577cf174ced5fe9bdff67dcb70190d9f8 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 7 Jun 2022 11:34:49 +0200 Subject: lib/test_vmalloc: switch to prandom_u32() A get_random_bytes() function can cause a high contention if it is called across CPUs simultaneously. Because it shares one lock per all CPUs: class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg &crng->lock: 663145 665886 0.05 8.85 261966.66 0.39 7188152 13731279 0.04 11.89 2181582.30 0.16 ----------- &crng->lock 307835 [<00000000acba59cd>] _extract_crng+0x48/0x90 &crng->lock 358051 [<00000000f0075abc>] _crng_backtrack_protect+0x32/0x90 ----------- &crng->lock 234241 [<00000000f0075abc>] _crng_backtrack_protect+0x32/0x90 &crng->lock 431645 [<00000000acba59cd>] _extract_crng+0x48/0x90 Switch from the get_random_bytes() to prandom_u32() that does not have any internal contention when a random value is needed for the tests. The reason is to minimize CPU cycles introduced by the test-suite itself from the vmalloc performance metrics. Link: https://lkml.kernel.org/r/20220607093449.3100-6-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'lib/test_vmalloc.c') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index cf41fd6df42a..4f2f2d1bac56 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -74,12 +74,13 @@ test_report_one_done(void) static int random_size_align_alloc_test(void) { - unsigned long size, align, rnd; + unsigned long size, align; + unsigned int rnd; void *ptr; int i; for (i = 0; i < test_loop_count; i++) { - get_random_bytes(&rnd, sizeof(rnd)); + rnd = prandom_u32(); /* * Maximum 1024 pages, if PAGE_SIZE is 4096. @@ -150,7 +151,7 @@ static int random_size_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - get_random_bytes(&n, sizeof(i)); + n = prandom_u32(); n = (n % 100) + 1; p = vmalloc(n * PAGE_SIZE); @@ -294,14 +295,14 @@ pcpu_alloc_test(void) for (i = 0; i < 35000; i++) { unsigned int r; - get_random_bytes(&r, sizeof(i)); + r = prandom_u32(); size = (r % (PAGE_SIZE / 4)) + 1; /* * Maximum PAGE_SIZE */ - get_random_bytes(&r, sizeof(i)); - align = 1 << ((i % 11) + 1); + r = prandom_u32(); + align = 1 << ((r % 11) + 1); pcpu[i] = __alloc_percpu(size, align); if (!pcpu[i]) @@ -396,7 +397,7 @@ static void shuffle_array(int *arr, int n) int i, j; for (i = n - 1; i > 0; i--) { - get_random_bytes(&rnd, sizeof(rnd)); + rnd = prandom_u32(); /* Cut the range. */ j = rnd % i; -- cgit From 8b3ccbc1f1f91847160951aa15dd27c22dddcb49 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 5 Oct 2022 16:43:38 +0200 Subject: treewide: use prandom_u32_max() when possible, part 2 Rather than incurring a division or requesting too many random bytes for the given range, use the prandom_u32_max() function, which only takes the minimum required bytes from the RNG and avoids divisions. This was done by hand, covering things that coccinelle could not do on its own. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Reviewed-by: Yury Norov Reviewed-by: Jan Kara # for ext2, ext4, and sbitmap Acked-by: Jakub Kicinski Signed-off-by: Jason A. Donenfeld --- lib/test_vmalloc.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'lib/test_vmalloc.c') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 4f2f2d1bac56..a26bbbf20e62 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -151,9 +151,7 @@ static int random_size_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - n = prandom_u32(); - n = (n % 100) + 1; - + n = prandom_u32_max(100) + 1; p = vmalloc(n * PAGE_SIZE); if (!p) @@ -293,16 +291,12 @@ pcpu_alloc_test(void) return -1; for (i = 0; i < 35000; i++) { - unsigned int r; - - r = prandom_u32(); - size = (r % (PAGE_SIZE / 4)) + 1; + size = prandom_u32_max(PAGE_SIZE / 4) + 1; /* * Maximum PAGE_SIZE */ - r = prandom_u32(); - align = 1 << ((r % 11) + 1); + align = 1 << (prandom_u32_max(11) + 1); pcpu[i] = __alloc_percpu(size, align); if (!pcpu[i]) @@ -393,14 +387,11 @@ static struct test_driver { static void shuffle_array(int *arr, int n) { - unsigned int rnd; int i, j; for (i = n - 1; i > 0; i--) { - rnd = prandom_u32(); - /* Cut the range. */ - j = rnd % i; + j = prandom_u32_max(i); /* Swap indexes. */ swap(arr[i], arr[j]); -- cgit From f743f16c548b1a2633e8b6034058d6475d7f26a3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 5 Oct 2022 17:23:53 +0200 Subject: treewide: use get_random_{u8,u16}() when possible, part 2 Rather than truncate a 32-bit value to a 16-bit value or an 8-bit value, simply use the get_random_{u8,u16}() functions, which are faster than wasting the additional bytes from a 32-bit value. This was done by hand, identifying all of the places where one of the random integer functions was used in a non-32-bit context. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Reviewed-by: Yury Norov Acked-by: Jakub Kicinski Acked-by: Heiko Carstens # for s390 Signed-off-by: Jason A. Donenfeld --- lib/test_vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/test_vmalloc.c') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index a26bbbf20e62..cf7780572f5b 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -80,7 +80,7 @@ static int random_size_align_alloc_test(void) int i; for (i = 0; i < test_loop_count; i++) { - rnd = prandom_u32(); + rnd = get_random_u8(); /* * Maximum 1024 pages, if PAGE_SIZE is 4096. -- cgit