Merge tag 'percpu-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu

Pull percpu updates from Dennis Zhou: "One bigger change to percpu_counter's api allowing for init and destroy of multiple counters via percpu_counter_init_many() and percpu_counter_destroy_many(). This is used to help begin remediating a performance regression with percpu rss stats. Additionally, it seems larger core count machines are feeling the burden of the single threaded allocation of percpu. Mateusz is thinking about it and I will spend some time on it too. percpu: - A couple cleanups by Baoquan He and Bibo Mao. The only behavior change is to start printing messages if we're under the warn limit for failed atomic allocations. percpu_counter: - Shakeel introduced percpu counters into mm_struct which caused percpu allocations be on the hot path [1]. Originally I spent some time trying to improve the percpu allocator, but instead preferred what Mateusz Guzik proposed grouping at the allocation site, percpu_counter_init_many(). This allows a single percpu allocation to be shared by the counters. I like this approach because it creates a shared lifetime by the allocations. Additionally, I believe many inits have higher level synchronization requirements, like percpu_counter does against HOTPLUG_CPU. Therefore we can group these optimizations together" Link: https://lore.kernel.org/linux-mm/20221024052841.3291983-1-shakeelb@google.com/ [1] * tag 'percpu-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu: kernel/fork: group allocation/free of per-cpu counters for mm struct pcpcntr: add group allocation/free mm/percpu.c: print error message too if atomic alloc failed mm/percpu.c: optimize the code in pcpu_setup_first_chunk() a little bit mm/percpu.c: remove redundant check mm/percpu: Remove some local variables in pcpu_populate_pte
author: Linus Torvalds <torvalds@linux-foundation.org> 2023-09-01 15:44:45 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2023-09-01 15:44:45 -0700
commit: e987af4546ac5de50e514182c1d0ca33843fa665 (patch)
tree: ed2fa9e146d4cd58ce35130e6dd08a00b2cb7a38 /mm
parent: 0fe2b86c21253bb365947ceed3531eb214d4c5b5 (diff)
parent: 14ef95be6f5558fb9e43aaf06ef9a1d6e0cae6c8 (diff)
1 files changed, 28 insertions, 41 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 28e07ede46f6..a7665de8485f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1890,13 +1890,15 @@ fail_unlock:
 fail:
 	trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
 
-	if (!is_atomic && do_warn && warn_limit) {
+	if (do_warn && warn_limit) {
 		pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
 			size, align, is_atomic, err);
-		dump_stack();
+		if (!is_atomic)
+			dump_stack();
 		if (!--warn_limit)
 			pr_info("limit reached, disable warning\n");
 	}
+
 	if (is_atomic) {
 		/* see the flag handling in pcpu_balance_workfn() */
 		pcpu_atomic_alloc_failed = true;
@@ -2581,14 +2583,12 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 {
 	size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	size_t static_size, dyn_size;
-	struct pcpu_chunk *chunk;
 	unsigned long *group_offsets;
 	size_t *group_sizes;
 	unsigned long *unit_off;
 	unsigned int cpu;
 	int *unit_map;
 	int group, unit, i;
-	int map_size;
 	unsigned long tmp_addr;
 	size_t alloc_size;
 
@@ -2615,7 +2615,6 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
 	PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
 	PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
-	PCPU_SETUP_BUG_ON(!ai->dyn_size);
 	PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
 	PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
 			    IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
@@ -2698,7 +2697,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_atom_size = ai->atom_size;
-	pcpu_chunk_struct_size = struct_size(chunk, populated,
+	pcpu_chunk_struct_size = struct_size((struct pcpu_chunk *)0, populated,
 					     BITS_TO_LONGS(pcpu_unit_pages));
 
 	pcpu_stats_save_ai(ai);
@@ -2735,29 +2734,23 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	dyn_size = ai->dyn_size - (static_size - ai->static_size);
 
 	/*
-	 * Initialize first chunk.
-	 * If the reserved_size is non-zero, this initializes the reserved
-	 * chunk.  If the reserved_size is zero, the reserved chunk is NULL
-	 * and the dynamic region is initialized here.  The first chunk,
-	 * pcpu_first_chunk, will always point to the chunk that serves
-	 * the dynamic region.
+	 * Initialize first chunk:
+	 * This chunk is broken up into 3 parts:
+	 *		< static | [reserved] | dynamic >
+	 * - static - there is no backing chunk because these allocations can
+	 *   never be freed.
+	 * - reserved (pcpu_reserved_chunk) - exists primarily to serve
+	 *   allocations from module load.
+	 * - dynamic (pcpu_first_chunk) - serves the dynamic part of the first
+	 *   chunk.
 	 */
 	tmp_addr = (unsigned long)base_addr + static_size;
-	map_size = ai->reserved_size ?: dyn_size;
-	chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
-
-	/* init dynamic chunk if necessary */
-	if (ai->reserved_size) {
-		pcpu_reserved_chunk = chunk;
+	if (ai->reserved_size)
+		pcpu_reserved_chunk = pcpu_alloc_first_chunk(tmp_addr,
+						ai->reserved_size);
+	tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size;
+	pcpu_first_chunk = pcpu_alloc_first_chunk(tmp_addr, dyn_size);
 
-		tmp_addr = (unsigned long)base_addr + static_size +
-			   ai->reserved_size;
-		map_size = dyn_size;
-		chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
-	}
-
-	/* link the first chunk in */
-	pcpu_first_chunk = chunk;
 	pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
@@ -3189,32 +3182,26 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
 	pmd_t *pmd;
 
 	if (pgd_none(*pgd)) {
-		p4d_t *new;
-
-		new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
-		if (!new)
+		p4d = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
+		if (!p4d)
 			goto err_alloc;
-		pgd_populate(&init_mm, pgd, new);
+		pgd_populate(&init_mm, pgd, p4d);
 	}
 
 	p4d = p4d_offset(pgd, addr);
 	if (p4d_none(*p4d)) {
-		pud_t *new;
-
-		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
-		if (!new)
+		pud = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+		if (!pud)
 			goto err_alloc;
-		p4d_populate(&init_mm, p4d, new);
+		p4d_populate(&init_mm, p4d, pud);
 	}
 
 	pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
-		pmd_t *new;
-
-		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
-		if (!new)
+		pmd = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+		if (!pmd)
 			goto err_alloc;
-		pud_populate(&init_mm, pud, new);
+		pud_populate(&init_mm, pud, pmd);
 	}
 
 	pmd = pmd_offset(pud, addr);
author	Linus Torvalds <torvalds@linux-foundation.org>	2023-09-01 15:44:45 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2023-09-01 15:44:45 -0700
commit	e987af4546ac5de50e514182c1d0ca33843fa665 (patch)
tree	ed2fa9e146d4cd58ce35130e6dd08a00b2cb7a38 /mm
parent	0fe2b86c21253bb365947ceed3531eb214d4c5b5 (diff)
parent	14ef95be6f5558fb9e43aaf06ef9a1d6e0cae6c8 (diff)