summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 14:08:17 -0700
commit512b7931ad0561ffe14265f9ff554a3c081b476b (patch)
treea94450d08468e094d2d92a495de4650faab09c1f /mm/page_alloc.c
parentfe91c4725aeed35023ba4f7a1e1adfebb6878c23 (diff)
parent658f9ae761b5965893727dd4edcdad56e5a439bb (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "257 patches. Subsystems affected by this patch series: scripts, ocfs2, vfs, and mm (slab-generic, slab, slub, kconfig, dax, kasan, debug, pagecache, gup, swap, memcg, pagemap, mprotect, mremap, iomap, tracing, vmalloc, pagealloc, memory-failure, hugetlb, userfaultfd, vmscan, tools, memblock, oom-kill, hugetlbfs, migration, thp, readahead, nommu, ksm, vmstat, madvise, memory-hotplug, rmap, zsmalloc, highmem, zram, cleanups, kfence, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (257 commits) mm/damon: remove return value from before_terminate callback mm/damon: fix a few spelling mistakes in comments and a pr_debug message mm/damon: simplify stop mechanism Docs/admin-guide/mm/pagemap: wordsmith page flags descriptions Docs/admin-guide/mm/damon/start: simplify the content Docs/admin-guide/mm/damon/start: fix a wrong link Docs/admin-guide/mm/damon/start: fix wrong example commands mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on mm/damon: remove unnecessary variable initialization Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM) selftests/damon: support watermarks mm/damon/dbgfs: support watermarks mm/damon/schemes: activate schemes based on a watermarks mechanism tools/selftests/damon: update for regions prioritization of schemes mm/damon/dbgfs: support prioritization weights mm/damon/vaddr,paddr: support pageout prioritization mm/damon/schemes: prioritize regions within the quotas mm/damon/selftests: support schemes quotas mm/damon/dbgfs: support quotas of schemes ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c119
1 files changed, 59 insertions, 60 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fee18ada46a2..c5952749ad40 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -677,10 +677,8 @@ static inline int pindex_to_order(unsigned int pindex)
int order = pindex / MIGRATE_PCPTYPES;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ if (order > PAGE_ALLOC_COSTLY_ORDER)
order = pageblock_order;
- VM_BUG_ON(order != pageblock_order);
- }
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
#endif
@@ -1430,14 +1428,8 @@ static inline void prefetch_buddy(struct page *page)
/*
* Frees a number of pages from the PCP lists
- * Assumes all pages on list are in same zone, and of same order.
+ * Assumes all pages on list are in same zone.
* count is the number of pages to free.
- *
- * If the zone was previously in an "all pages pinned" state then look to
- * see if this freeing clears that state.
- *
- * And clear the zone's pages_scanned counter, to hold off the "all pages are
- * pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp)
@@ -1591,7 +1583,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
- if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
+ if (zone_spans_pfn(zone, pfn))
break;
}
__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
@@ -3149,9 +3141,9 @@ static void drain_local_pages_wq(struct work_struct *work)
* cpu which is alright but we also have to make sure to not move to
* a different one.
*/
- preempt_disable();
+ migrate_disable();
drain_local_pages(drain->zone);
- preempt_enable();
+ migrate_enable();
}
/*
@@ -3968,6 +3960,8 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
}
#ifdef CONFIG_NUMA
+int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
+
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
{
return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
@@ -4797,30 +4791,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
trace_reclaim_retry_zone(z, order, reclaimable,
available, min_wmark, *no_progress_loops, wmark);
if (wmark) {
- /*
- * If we didn't make any progress and have a lot of
- * dirty + writeback pages then we should wait for
- * an IO to complete to slow down the reclaim and
- * prevent from pre mature OOM
- */
- if (!did_some_progress) {
- unsigned long write_pending;
-
- write_pending = zone_page_state_snapshot(zone,
- NR_ZONE_WRITE_PENDING);
-
- if (2 * write_pending > reclaimable) {
- congestion_wait(BLK_RW_ASYNC, HZ/10);
- return true;
- }
- }
-
ret = true;
- goto out;
+ break;
}
}
-out:
/*
* Memory allocation/reclaim might be called from a WQ context and the
* current implementation of the WQ concurrency control doesn't
@@ -4916,6 +4891,19 @@ retry_cpuset:
if (!ac->preferred_zoneref->zone)
goto nopage;
+ /*
+ * Check for insane configurations where the cpuset doesn't contain
+ * any suitable zone to satisfy the request - e.g. non-movable
+ * GFP_HIGHUSER allocations from MOVABLE nodes only.
+ */
+ if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) {
+ struct zoneref *z = first_zones_zonelist(ac->zonelist,
+ ac->highest_zoneidx,
+ &cpuset_current_mems_allowed);
+ if (!z->zone)
+ goto nopage;
+ }
+
if (alloc_flags & ALLOC_KSWAPD)
wake_all_kswapds(order, gfp_mask, ac);
@@ -5630,8 +5618,8 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
unsigned int order = get_order(size);
unsigned long addr;
- if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
- gfp_mask &= ~__GFP_COMP;
+ if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
+ gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
addr = __get_free_pages(gfp_mask, order);
return make_alloc_exact(addr, order, size);
@@ -5655,8 +5643,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
unsigned int order = get_order(size);
struct page *p;
- if (WARN_ON_ONCE(gfp_mask & __GFP_COMP))
- gfp_mask &= ~__GFP_COMP;
+ if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
+ gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
p = alloc_pages_node(nid, gfp_mask, order);
if (!p)
@@ -5998,6 +5986,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
printk(KERN_CONT
"%s"
" free:%lukB"
+ " boost:%lukB"
" min:%lukB"
" low:%lukB"
" high:%lukB"
@@ -6018,6 +6007,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
"\n",
zone->name,
K(zone_page_state(zone, NR_FREE_PAGES)),
+ K(zone->watermark_boost),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
@@ -6273,7 +6263,7 @@ static void build_zonelists(pg_data_t *pgdat)
*/
if (node_distance(local_node, node) !=
node_distance(local_node, prev_node))
- node_load[node] = load;
+ node_load[node] += load;
node_order[nr_nodes++] = node;
prev_node = node;
@@ -6282,6 +6272,10 @@ static void build_zonelists(pg_data_t *pgdat)
build_zonelists_in_node_order(pgdat, node_order, nr_nodes);
build_thisnode_zonelists(pgdat);
+ pr_info("Fallback order for Node %d: ", local_node);
+ for (node = 0; node < nr_nodes; node++)
+ pr_cont("%d ", node_order[node]);
+ pr_cont("\n");
}
#ifdef CONFIG_HAVE_MEMORYLESS_NODES
@@ -7407,6 +7401,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
{
+ int i;
+
pgdat_resize_init(pgdat);
pgdat_init_split_queue(pgdat);
@@ -7415,6 +7411,9 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
+ for (i = 0; i < NR_VMSCAN_THROTTLE; i++)
+ init_waitqueue_head(&pgdat->reclaim_wait[i]);
+
pgdat_page_ext_init(pgdat);
lruvec_init(&pgdat->__lruvec);
}
@@ -8144,8 +8143,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
}
if (pages && s)
- pr_info("Freeing %s memory: %ldK\n",
- s, pages << (PAGE_SHIFT - 10));
+ pr_info("Freeing %s memory: %ldK\n", s, K(pages));
return pages;
}
@@ -8190,14 +8188,13 @@ void __init mem_init_print_info(void)
", %luK highmem"
#endif
")\n",
- nr_free_pages() << (PAGE_SHIFT - 10),
- physpages << (PAGE_SHIFT - 10),
+ K(nr_free_pages()), K(physpages),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
- (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
- totalcma_pages << (PAGE_SHIFT - 10)
+ K(physpages - totalram_pages() - totalcma_pages),
+ K(totalcma_pages)
#ifdef CONFIG_HIGHMEM
- , totalhigh_pages() << (PAGE_SHIFT - 10)
+ , K(totalhigh_pages())
#endif
);
}
@@ -8470,7 +8467,7 @@ void setup_per_zone_wmarks(void)
* 8192MB: 11584k
* 16384MB: 16384k
*/
-int __meminit init_per_zone_wmark_min(void)
+void calculate_min_free_kbytes(void)
{
unsigned long lowmem_kbytes;
int new_min_free_kbytes;
@@ -8478,16 +8475,17 @@ int __meminit init_per_zone_wmark_min(void)
lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
- if (new_min_free_kbytes > user_min_free_kbytes) {
- min_free_kbytes = new_min_free_kbytes;
- if (min_free_kbytes < 128)
- min_free_kbytes = 128;
- if (min_free_kbytes > 262144)
- min_free_kbytes = 262144;
- } else {
+ if (new_min_free_kbytes > user_min_free_kbytes)
+ min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144);
+ else
pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
new_min_free_kbytes, user_min_free_kbytes);
- }
+
+}
+
+int __meminit init_per_zone_wmark_min(void)
+{
+ calculate_min_free_kbytes();
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
@@ -8774,7 +8772,8 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
- huge = is_vm_area_hugepages(table);
+ if (table)
+ huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -9371,21 +9370,21 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
}
#endif
+/*
+ * This function returns a stable result only if called under zone lock.
+ */
bool is_free_buddy_page(struct page *page)
{
- struct zone *zone = page_zone(page);
unsigned long pfn = page_to_pfn(page);
- unsigned long flags;
unsigned int order;
- spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
struct page *page_head = page - (pfn & ((1 << order) - 1));
- if (PageBuddy(page_head) && buddy_order(page_head) >= order)
+ if (PageBuddy(page_head) &&
+ buddy_order_unsafe(page_head) >= order)
break;
}
- spin_unlock_irqrestore(&zone->lock, flags);
return order < MAX_ORDER;
}