summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2025-04-23 14:33:37 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-05-12 23:50:43 -0700
commitf88ce2c84a341f44a7d00bc10868714bc4751f7e (patch)
tree5528cda9200653b903c5a691e1745d36d0d7846b
parenta3d2e34dce2041cf6994919430e75e5eafb99bcd (diff)
mm: introduce for_each_valid_pfn() and use it from reserve_bootmem_region()
Patch series "mm: Introduce for_each_valid_pfn()", v4. There are cases where a naïve loop over a PFN range, calling pfn_valid() on each one, is horribly inefficient. Ruihan Li reported the case where memmap_init() iterates all the way from zero to a potentially large value of ARCH_PFN_OFFSET, and we at Amazon found the reserve_bootmem_region() one as it affects hypervisor live update. Others are more cosmetic. By introducing a for_each_valid_pfn() helper it can optimise away a lot of pointless calls to pfn_valid(), skipping immediately to the next valid PFN and also skipping *all* checks within a valid (sub)region according to the granularity of the memory model in use. This patch (of 7) Especially since commit 9092d4f7a1f8 ("memblock: update initialization of reserved pages"), the reserve_bootmem_region() function can spend a significant amount of time iterating over every 4KiB PFN in a range, calling pfn_valid() on each one, and ultimately doing absolutely nothing. On a platform used for virtualization, with large NOMAP regions that eventually get used for guest RAM, this leads to a significant increase in steal time experienced during kexec for a live update. Introduce for_each_valid_pfn() and use it from reserve_bootmem_region(). This implementation is precisely the same naïve loop that the functio used to have, but subsequent commits will provide optimised versions for FLATMEM and SPARSEMEM, and this version will remain for those architectures which provide their own pfn_valid() implementation, until/unless they also provide a matching for_each_valid_pfn(). Link: https://lkml.kernel.org/r/20250423133821.789413-1-dwmw2@infradead.org Link: https://lkml.kernel.org/r/20250423133821.789413-2-dwmw2@infradead.org Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Acked-by: David Hildenbrand <david@redhat.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Marc Rutland <mark.rutland@arm.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Ruihan Li <lrh2000@pku.edu.cn> Cc: Will Deacon <will@kernel.org> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h10
-rw-r--r--mm/mm_init.c23
2 files changed, 20 insertions, 13 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ccec1bf2896..230a29c2ed1a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2177,6 +2177,16 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
+/*
+ * Fallback case for when the architecture provides its own pfn_valid() but
+ * not a corresponding for_each_valid_pfn().
+ */
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \
+ for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \
+ if (pfn_valid(_pfn))
+#endif
+
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index b35006d9d49d..7191703a5820 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -783,22 +783,19 @@ void __meminit init_deferred_page(unsigned long pfn, int nid)
void __meminit reserve_bootmem_region(phys_addr_t start,
phys_addr_t end, int nid)
{
- unsigned long start_pfn = PFN_DOWN(start);
- unsigned long end_pfn = PFN_UP(end);
+ unsigned long pfn;
- for (; start_pfn < end_pfn; start_pfn++) {
- if (pfn_valid(start_pfn)) {
- struct page *page = pfn_to_page(start_pfn);
+ for_each_valid_pfn(pfn, PFN_DOWN(start), PFN_UP(end)) {
+ struct page *page = pfn_to_page(pfn);
- __init_deferred_page(start_pfn, nid);
+ __init_deferred_page(pfn, nid);
- /*
- * no need for atomic set_bit because the struct
- * page is not visible yet so nobody should
- * access it yet.
- */
- __SetPageReserved(page);
- }
+ /*
+ * no need for atomic set_bit because the struct
+ * page is not visible yet so nobody should
+ * access it yet.
+ */
+ __SetPageReserved(page);
}
}