diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 85 |
1 files changed, 35 insertions, 50 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9fba8859ecd7..e0ff3a811ec5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -69,6 +69,7 @@ #include <linux/nmi.h> #include <linux/psi.h> #include <linux/padata.h> +#include <linux/khugepaged.h> #include <asm/sections.h> #include <asm/tlbflush.h> @@ -155,16 +156,16 @@ static int __init early_init_on_alloc(char *buf) int ret; bool bool_result; - if (!buf) - return -EINVAL; ret = kstrtobool(buf, &bool_result); + if (ret) + return ret; if (bool_result && page_poisoning_enabled()) pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); if (bool_result) static_branch_enable(&init_on_alloc); else static_branch_disable(&init_on_alloc); - return ret; + return 0; } early_param("init_on_alloc", early_init_on_alloc); @@ -173,16 +174,16 @@ static int __init early_init_on_free(char *buf) int ret; bool bool_result; - if (!buf) - return -EINVAL; ret = kstrtobool(buf, &bool_result); + if (ret) + return ret; if (bool_result && page_poisoning_enabled()) pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); if (bool_result) static_branch_enable(&init_on_free); else static_branch_disable(&init_on_free); - return ret; + return 0; } early_param("init_on_free", early_init_on_free); @@ -3740,8 +3741,8 @@ retry: */ no_fallback = alloc_flags & ALLOC_NOFRAGMENT; z = ac->preferred_zoneref; - for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, - ac->highest_zoneidx, ac->nodemask) { + for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx, + ac->nodemask) { struct page *page; unsigned long mark; @@ -3985,8 +3986,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * success so it is time to admit defeat. We will skip the OOM killer * because it is very likely that the caller has a more reasonable * fallback than shooting a random task. + * + * The OOM killer may not free memory on a specific node. */ - if (gfp_mask & __GFP_RETRY_MAYFAIL) + if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE)) goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (ac->highest_zoneidx < ZONE_NORMAL) @@ -4003,10 +4006,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * failures more gracefully we should just bail out here. */ - /* The OOM killer may not free memory on a specific node */ - if (gfp_mask & __GFP_THISNODE) - goto out; - /* Exhausted what can be done so it's blame time */ if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { *did_some_progress = 1; @@ -4254,13 +4253,12 @@ EXPORT_SYMBOL_GPL(fs_reclaim_release); #endif /* Perform direct synchronous page reclaim */ -static int +static unsigned long __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { - int progress; unsigned int noreclaim_flag; - unsigned long pflags; + unsigned long pflags, progress; cond_resched(); @@ -4839,12 +4837,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags); - return true; -} - -/* Determine whether to spread dirty pages and what the first usable zone */ -static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) -{ /* Dirty zone balancing only done in the fast path */ ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); @@ -4855,6 +4847,8 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac) */ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->highest_zoneidx, ac->nodemask); + + return true; } /* @@ -4883,8 +4877,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) return NULL; - finalise_ac(gfp_mask, &ac); - /* * Forbid the first pass from falling back to types that fragment * memory until all local zones are considered. @@ -4960,6 +4952,9 @@ void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) free_the_page(page, order); + else if (!PageHead(page)) + while (order-- > 0) + free_the_page(page + (1 << order), order); } EXPORT_SYMBOL(__free_pages); @@ -5650,7 +5645,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) int n, val; int min_val = INT_MAX; int best_node = NUMA_NO_NODE; - const struct cpumask *tmp = cpumask_of_node(0); /* Use the local node if we haven't already */ if (!node_isset(node, *used_node_mask)) { @@ -5671,8 +5665,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) val += (n < node); /* Give preference to headless and unused nodes */ - tmp = cpumask_of_node(n); - if (!cpumask_empty(tmp)) + if (!cpumask_empty(cpumask_of_node(n))) val += PENALTY_FOR_NODE_WITH_CPUS; /* Slight preference for less loaded node */ @@ -5968,7 +5961,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) if (mirrored_kernelcore && zone == ZONE_MOVABLE) { if (!r || *pfn >= memblock_region_memory_end_pfn(r)) { - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (*pfn < memblock_region_memory_end_pfn(r)) break; } @@ -6553,7 +6546,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid, unsigned long start_pfn, end_pfn; struct memblock_region *r; - for_each_memblock(memory, r) { + for_each_mem_region(r) { start_pfn = clamp(memblock_region_memory_base_pfn(r), zone_start_pfn, zone_end_pfn); end_pfn = clamp(memblock_region_memory_end_pfn(r), @@ -6997,8 +6990,7 @@ static void __init init_unavailable_mem(void) * Loop through unavailable ranges not covered by memblock.memory. */ pgcnt = 0; - for_each_mem_range(i, &memblock.memory, NULL, - NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) { + for_each_mem_range(i, &start, &end) { if (next < start) pgcnt += init_unavailable_range(PFN_DOWN(next), PFN_UP(start)); @@ -7148,7 +7140,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) * options. */ if (movable_node_is_enabled()) { - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (!memblock_is_hotpluggable(r)) continue; @@ -7169,7 +7161,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) if (mirrored_kernelcore) { bool mem_below_4gb_not_mirrored = false; - for_each_memblock(memory, r) { + for_each_mem_region(r) { if (memblock_is_mirror(r)) continue; @@ -7904,6 +7896,8 @@ int __meminit init_per_zone_wmark_min(void) setup_min_slab_ratio(); #endif + khugepaged_min_free_kbytes_update(); + return 0; } postcore_initcall(init_per_zone_wmark_min) @@ -8231,14 +8225,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, { unsigned long iter = 0; unsigned long pfn = page_to_pfn(page); - - /* - * TODO we could make this much more efficient by not checking every - * page in the range if we know all of them are in MOVABLE_ZONE and - * that the movable zone guarantees that pages are migratable but - * the later is not the case right now unfortunatelly. E.g. movablecore - * can still lead to having bootmem allocations in zone_movable. - */ + unsigned long offset = pfn % pageblock_nr_pages; if (is_migrate_cma_page(page)) { /* @@ -8252,12 +8239,18 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, return page; } - for (; iter < pageblock_nr_pages; iter++) { + for (; iter < pageblock_nr_pages - offset; iter++) { if (!pfn_valid_within(pfn + iter)) continue; page = pfn_to_page(pfn + iter); + /* + * Both, bootmem allocations and memory holes are marked + * PG_reserved and are unmovable. We can even have unmovable + * allocations inside ZONE_MOVABLE, for example when + * specifying "movablecore". + */ if (PageReserved(page)) return page; @@ -8331,14 +8324,6 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, * it. But now, memory offline itself doesn't call * shrink_node_slabs() and it still to be fixed. */ - /* - * If the page is not RAM, page_count()should be 0. - * we don't need more check. This is an _used_ not-movable page. - * - * The problematic thing here is PG_reserved pages. PG_reserved - * is set to both of a memory hole page and a _used_ kernel - * page at boot. - */ return page; } return NULL; |