summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c459
1 files changed, 335 insertions, 124 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e2f19bf948db..aaa1655cf682 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -72,7 +72,6 @@
#include <linux/padata.h>
#include <linux/khugepaged.h>
#include <linux/buffer_head.h>
-
#include <asm/sections.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -108,6 +107,17 @@ typedef int __bitwise fpi_t;
*/
#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
+/*
+ * Don't poison memory with KASAN (only for the tag-based modes).
+ * During boot, all non-reserved memblock memory is exposed to page_alloc.
+ * Poisoning all that memory lengthens boot time, especially on systems with
+ * large amount of RAM. This flag is used to skip that poisoning.
+ * This is only done for the tag-based KASAN modes, as those are able to
+ * detect memory corruptions with the memory tags assigned by default.
+ * All memory allocated normally after boot gets poisoned as usual.
+ */
+#define FPI_SKIP_KASAN_POISON ((__force fpi_t)BIT(2))
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
@@ -384,10 +394,15 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
* on-demand allocation and then freed again before the deferred pages
* initialization is done, but this is not likely to happen.
*/
-static inline void kasan_free_nondeferred_pages(struct page *page, int order)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ bool init, fpi_t fpi_flags)
{
- if (!static_branch_unlikely(&deferred_pages))
- kasan_free_pages(page, order);
+ if (static_branch_unlikely(&deferred_pages))
+ return;
+ if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+ (fpi_flags & FPI_SKIP_KASAN_POISON))
+ return;
+ kasan_free_pages(page, order, init);
}
/* Returns true if the struct page for the pfn is uninitialised */
@@ -438,7 +453,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
return false;
}
#else
-#define kasan_free_nondeferred_pages(p, o) kasan_free_pages(p, o)
+static inline void kasan_free_nondeferred_pages(struct page *page, int order,
+ bool init, fpi_t fpi_flags)
+{
+ if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+ (fpi_flags & FPI_SKIP_KASAN_POISON))
+ return;
+ kasan_free_pages(page, order, init);
+}
static inline bool early_page_uninitialised(unsigned long pfn)
{
@@ -764,32 +786,36 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
*/
void init_mem_debugging_and_hardening(void)
{
+ bool page_poisoning_requested = false;
+
+#ifdef CONFIG_PAGE_POISONING
+ /*
+ * Page poisoning is debug page alloc for some arches. If
+ * either of those options are enabled, enable poisoning.
+ */
+ if (page_poisoning_enabled() ||
+ (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
+ debug_pagealloc_enabled())) {
+ static_branch_enable(&_page_poisoning_enabled);
+ page_poisoning_requested = true;
+ }
+#endif
+
if (_init_on_alloc_enabled_early) {
- if (page_poisoning_enabled())
+ if (page_poisoning_requested)
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_alloc\n");
else
static_branch_enable(&init_on_alloc);
}
if (_init_on_free_enabled_early) {
- if (page_poisoning_enabled())
+ if (page_poisoning_requested)
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_free\n");
else
static_branch_enable(&init_on_free);
}
-#ifdef CONFIG_PAGE_POISONING
- /*
- * Page poisoning is debug page alloc for some arches. If
- * either of those options are enabled, enable poisoning.
- */
- if (page_poisoning_enabled() ||
- (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
- debug_pagealloc_enabled()))
- static_branch_enable(&_page_poisoning_enabled);
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled())
return;
@@ -867,7 +893,7 @@ compaction_capture(struct capture_control *capc, struct page *page,
return false;
/*
- * Do not let lower order allocations polluate a movable pageblock.
+ * Do not let lower order allocations pollute a movable pageblock.
* This might let an unmovable request use a reclaimable pageblock
* and vice-versa but no more than normal fallback logic which can
* have trouble finding a high-order free page.
@@ -1103,7 +1129,7 @@ static inline bool page_expected_state(struct page *page,
if (unlikely((unsigned long)page->mapping |
page_ref_count(page) |
#ifdef CONFIG_MEMCG
- (unsigned long)page_memcg(page) |
+ page->memcg_data |
#endif
(page->flags & check_flags)))
return false;
@@ -1128,7 +1154,7 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
}
#ifdef CONFIG_MEMCG
- if (unlikely(page_memcg(page)))
+ if (unlikely(page->memcg_data))
bad_reason = "page still charged to cgroup";
#endif
return bad_reason;
@@ -1216,9 +1242,10 @@ static void kernel_init_free_pages(struct page *page, int numpages)
}
static __always_inline bool free_pages_prepare(struct page *page,
- unsigned int order, bool check_free)
+ unsigned int order, bool check_free, fpi_t fpi_flags)
{
int bad = 0;
+ bool init;
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -1276,16 +1303,21 @@ static __always_inline bool free_pages_prepare(struct page *page,
debug_check_no_obj_freed(page_address(page),
PAGE_SIZE << order);
}
- if (want_init_on_free())
- kernel_init_free_pages(page, 1 << order);
kernel_poison_pages(page, 1 << order);
/*
+ * As memory initialization might be integrated into KASAN,
+ * kasan_free_pages and kernel_init_free_pages must be
+ * kept together to avoid discrepancies in behavior.
+ *
* With hardware tag-based KASAN, memory tags must be set before the
* page becomes unavailable via debug_pagealloc or arch_free_page.
*/
- kasan_free_nondeferred_pages(page, order);
+ init = want_init_on_free();
+ if (init && !kasan_has_integrated_init())
+ kernel_init_free_pages(page, 1 << order);
+ kasan_free_nondeferred_pages(page, order, init, fpi_flags);
/*
* arch_free_page() can make the page's contents inaccessible. s390
@@ -1307,7 +1339,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
*/
static bool free_pcp_prepare(struct page *page)
{
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -1327,9 +1359,9 @@ static bool bulkfree_pcp_prepare(struct page *page)
static bool free_pcp_prepare(struct page *page)
{
if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, 0, true);
+ return free_pages_prepare(page, 0, true, FPI_NONE);
else
- return free_pages_prepare(page, 0, false);
+ return free_pages_prepare(page, 0, false, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -1537,7 +1569,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,
int migratetype;
unsigned long pfn = page_to_pfn(page);
- if (!free_pages_prepare(page, order, true))
+ if (!free_pages_prepare(page, order, true, fpi_flags))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
@@ -1574,7 +1606,7 @@ void __free_pages_core(struct page *page, unsigned int order)
* Bypass PCP and place fresh pages right to the tail, primarily
* relevant for memory onlining.
*/
- __free_pages_ok(page, order, FPI_TO_TAIL);
+ __free_pages_ok(page, order, FPI_TO_TAIL | FPI_SKIP_KASAN_POISON);
}
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2292,17 +2324,32 @@ static bool check_new_pages(struct page *page, unsigned int order)
inline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
+ bool init;
+
set_page_private(page, 0);
set_page_refcounted(page);
arch_alloc_page(page, order);
debug_pagealloc_map_pages(page, 1 << order);
- kasan_alloc_pages(page, order);
+
+ /*
+ * Page unpoisoning must happen before memory initialization.
+ * Otherwise, the poison pattern will be overwritten for __GFP_ZERO
+ * allocations and the page unpoisoning code will complain.
+ */
kernel_unpoison_pages(page, 1 << order);
- set_page_owner(page, order, gfp_flags);
- if (!want_init_on_free() && want_init_on_alloc(gfp_flags))
+ /*
+ * As memory initialization might be integrated into KASAN,
+ * kasan_alloc_pages and kernel_init_free_pages must be
+ * kept together to avoid discrepancies in behavior.
+ */
+ init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
+ kasan_alloc_pages(page, order, init);
+ if (init && !kasan_has_integrated_init())
kernel_init_free_pages(page, 1 << order);
+
+ set_page_owner(page, order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -2386,19 +2433,21 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
* boundary. If alignment is required, use move_freepages_block()
*/
static int move_freepages(struct zone *zone,
- struct page *start_page, struct page *end_page,
+ unsigned long start_pfn, unsigned long end_pfn,
int migratetype, int *num_movable)
{
struct page *page;
+ unsigned long pfn;
unsigned int order;
int pages_moved = 0;
- for (page = start_page; page <= end_page;) {
- if (!pfn_valid_within(page_to_pfn(page))) {
- page++;
+ for (pfn = start_pfn; pfn <= end_pfn;) {
+ if (!pfn_valid_within(pfn)) {
+ pfn++;
continue;
}
+ page = pfn_to_page(pfn);
if (!PageBuddy(page)) {
/*
* We assume that pages that could be isolated for
@@ -2408,8 +2457,7 @@ static int move_freepages(struct zone *zone,
if (num_movable &&
(PageLRU(page) || __PageMovable(page)))
(*num_movable)++;
-
- page++;
+ pfn++;
continue;
}
@@ -2419,7 +2467,7 @@ static int move_freepages(struct zone *zone,
order = buddy_order(page);
move_to_free_list(page, zone, order, migratetype);
- page += 1 << order;
+ pfn += 1 << order;
pages_moved += 1 << order;
}
@@ -2429,25 +2477,22 @@ static int move_freepages(struct zone *zone,
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype, int *num_movable)
{
- unsigned long start_pfn, end_pfn;
- struct page *start_page, *end_page;
+ unsigned long start_pfn, end_pfn, pfn;
if (num_movable)
*num_movable = 0;
- start_pfn = page_to_pfn(page);
- start_pfn = start_pfn & ~(pageblock_nr_pages-1);
- start_page = pfn_to_page(start_pfn);
- end_page = start_page + pageblock_nr_pages - 1;
+ pfn = page_to_pfn(page);
+ start_pfn = pfn & ~(pageblock_nr_pages - 1);
end_pfn = start_pfn + pageblock_nr_pages - 1;
/* Do not cross zone boundaries */
if (!zone_spans_pfn(zone, start_pfn))
- start_page = page;
+ start_pfn = pfn;
if (!zone_spans_pfn(zone, end_pfn))
return 0;
- return move_freepages(zone, start_page, end_page, migratetype,
+ return move_freepages(zone, start_pfn, end_pfn, migratetype,
num_movable);
}
@@ -2731,7 +2776,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
/*
* In page freeing path, migratetype change is racy so
* we can counter several free pages in a pageblock
- * in this loop althoug we changed the pageblock type
+ * in this loop although we changed the pageblock type
* from highatomic to ac->migratetype. So we should
* adjust the count once.
*/
@@ -2908,7 +2953,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
- int i, alloced = 0;
+ int i, allocated = 0;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
@@ -2931,7 +2976,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages are ordered properly.
*/
list_add_tail(&page->lru, list);
- alloced++;
+ allocated++;
if (is_migrate_cma(get_pcppage_migratetype(page)))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-(1 << order));
@@ -2940,12 +2985,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
/*
* i pages were removed from the buddy list even if some leak due
* to check_pcp_refill failing so adjust NR_FREE_PAGES based
- * on i. Do not confuse with 'alloced' which is the number of
+ * on i. Do not confuse with 'allocated' which is the number of
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
spin_unlock(&zone->lock);
- return alloced;
+ return allocated;
}
#ifdef CONFIG_NUMA
@@ -3035,7 +3080,7 @@ static void drain_local_pages_wq(struct work_struct *work)
* drain_all_pages doesn't use proper cpu hotplug protection so
* we can race with cpu offline when the WQ can move this from
* a cpu pinned worker to an unbound one. We can operate on a different
- * cpu which is allright but we also have to make sure to not move to
+ * cpu which is alright but we also have to make sure to not move to
* a different one.
*/
preempt_disable();
@@ -3415,7 +3460,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
}
/* Remove page from the per-cpu list, caller must protect the list */
-static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+static inline
+struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
unsigned int alloc_flags,
struct per_cpu_pages *pcp,
struct list_head *list)
@@ -3813,16 +3859,13 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
return alloc_flags;
}
-static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
- unsigned int alloc_flags)
+/* Must be called after current_gfp_context() which can change gfp_mask */
+static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
+ unsigned int alloc_flags)
{
#ifdef CONFIG_CMA
- unsigned int pflags = current->flags;
-
- if (!(pflags & PF_MEMALLOC_NOCMA) &&
- gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-
#endif
return alloc_flags;
}
@@ -3922,7 +3965,7 @@ retry:
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;
- if (node_reclaim_mode == 0 ||
+ if (!node_reclaim_enabled() ||
!zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
continue;
@@ -4130,7 +4173,7 @@ out:
}
/*
- * Maximum number of compaction retries wit a progress before OOM
+ * Maximum number of compaction retries with a progress before OOM
* killer is consider as the only way to move forward.
*/
#define MAX_COMPACT_RETRIES 16
@@ -4158,6 +4201,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
+ if (*compact_result == COMPACT_SKIPPED)
+ return NULL;
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
* count a compaction stall
@@ -4478,7 +4523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
} else if (unlikely(rt_task(current)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;
- alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
return alloc_flags;
}
@@ -4780,7 +4825,7 @@ retry:
reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
if (reserve_flags)
- alloc_flags = current_alloc_flags(gfp_mask, reserve_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
/*
* Reset the nodemask and zonelist iterators if memory policies can be
@@ -4921,7 +4966,7 @@ got_pg:
static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
int preferred_nid, nodemask_t *nodemask,
- struct alloc_context *ac, gfp_t *alloc_mask,
+ struct alloc_context *ac, gfp_t *alloc_gfp,
unsigned int *alloc_flags)
{
ac->highest_zoneidx = gfp_zone(gfp_mask);
@@ -4930,7 +4975,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
ac->migratetype = gfp_migratetype(gfp_mask);
if (cpusets_enabled()) {
- *alloc_mask |= __GFP_HARDWALL;
+ *alloc_gfp |= __GFP_HARDWALL;
/*
* When we are in the interrupt context, it is irrelevant
* to the current task context. It means that any node ok.
@@ -4949,7 +4994,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
if (should_fail_alloc_page(gfp_mask, order))
return false;
- *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
+ *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
/* Dirty zone balancing only done in the fast path */
ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
@@ -4966,15 +5011,160 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
}
/*
+ * __alloc_pages_bulk - Allocate a number of order-0 pages to a list or array
+ * @gfp: GFP flags for the allocation
+ * @preferred_nid: The preferred NUMA node ID to allocate from
+ * @nodemask: Set of nodes to allocate from, may be NULL
+ * @nr_pages: The number of pages desired on the list or array
+ * @page_list: Optional list to store the allocated pages
+ * @page_array: Optional array to store the pages
+ *
+ * This is a batched version of the page allocator that attempts to
+ * allocate nr_pages quickly. Pages are added to page_list if page_list
+ * is not NULL, otherwise it is assumed that the page_array is valid.
+ *
+ * For lists, nr_pages is the number of pages that should be allocated.
+ *
+ * For arrays, only NULL elements are populated with pages and nr_pages
+ * is the maximum number of pages that will be stored in the array.
+ *
+ * Returns the number of pages on the list or array.
+ */
+unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+ nodemask_t *nodemask, int nr_pages,
+ struct list_head *page_list,
+ struct page **page_array)
+{
+ struct page *page;
+ unsigned long flags;
+ struct zone *zone;
+ struct zoneref *z;
+ struct per_cpu_pages *pcp;
+ struct list_head *pcp_list;
+ struct alloc_context ac;
+ gfp_t alloc_gfp;
+ unsigned int alloc_flags = ALLOC_WMARK_LOW;
+ int nr_populated = 0;
+
+ if (unlikely(nr_pages <= 0))
+ return 0;
+
+ /*
+ * Skip populated array elements to determine if any pages need
+ * to be allocated before disabling IRQs.
+ */
+ while (page_array && page_array[nr_populated] && nr_populated < nr_pages)
+ nr_populated++;
+
+ /* Use the single page allocator for one page. */
+ if (nr_pages - nr_populated == 1)
+ goto failed;
+
+ /* May set ALLOC_NOFRAGMENT, fragmentation will return 1 page. */
+ gfp &= gfp_allowed_mask;
+ alloc_gfp = gfp;
+ if (!prepare_alloc_pages(gfp, 0, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags))
+ return 0;
+ gfp = alloc_gfp;
+
+ /* Find an allowed local zone that meets the low watermark. */
+ for_each_zone_zonelist_nodemask(zone, z, ac.zonelist, ac.highest_zoneidx, ac.nodemask) {
+ unsigned long mark;
+
+ if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) &&
+ !__cpuset_zone_allowed(zone, gfp)) {
+ continue;
+ }
+
+ if (nr_online_nodes > 1 && zone != ac.preferred_zoneref->zone &&
+ zone_to_nid(zone) != zone_to_nid(ac.preferred_zoneref->zone)) {
+ goto failed;
+ }
+
+ mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
+ if (zone_watermark_fast(zone, 0, mark,
+ zonelist_zone_idx(ac.preferred_zoneref),
+ alloc_flags, gfp)) {
+ break;
+ }
+ }
+
+ /*
+ * If there are no allowed local zones that meets the watermarks then
+ * try to allocate a single page and reclaim if necessary.
+ */
+ if (unlikely(!zone))
+ goto failed;
+
+ /* Attempt the batch allocation */
+ local_irq_save(flags);
+ pcp = &this_cpu_ptr(zone->pageset)->pcp;
+ pcp_list = &pcp->lists[ac.migratetype];
+
+ while (nr_populated < nr_pages) {
+
+ /* Skip existing pages */
+ if (page_array && page_array[nr_populated]) {
+ nr_populated++;
+ continue;
+ }
+
+ page = __rmqueue_pcplist(zone, ac.migratetype, alloc_flags,
+ pcp, pcp_list);
+ if (unlikely(!page)) {
+ /* Try and get at least one page */
+ if (!nr_populated)
+ goto failed_irq;
+ break;
+ }
+
+ /*
+ * Ideally this would be batched but the best way to do
+ * that cheaply is to first convert zone_statistics to
+ * be inaccurate per-cpu counter like vm_events to avoid
+ * a RMW cycle then do the accounting with IRQs enabled.
+ */
+ __count_zid_vm_events(PGALLOC, zone_idx(zone), 1);
+ zone_statistics(ac.preferred_zoneref->zone, zone);
+
+ prep_new_page(page, 0, gfp, 0);
+ if (page_list)
+ list_add(&page->lru, page_list);
+ else
+ page_array[nr_populated] = page;
+ nr_populated++;
+ }
+
+ local_irq_restore(flags);
+
+ return nr_populated;
+
+failed_irq:
+ local_irq_restore(flags);
+
+failed:
+ page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
+ if (page) {
+ if (page_list)
+ list_add(&page->lru, page_list);
+ else
+ page_array[nr_populated] = page;
+ nr_populated++;
+ }
+
+ return nr_populated;
+}
+EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+
+/*
* This is the 'heart' of the zoned buddy allocator.
*/
-struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask)
{
struct page *page;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
- gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
+ gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
/*
@@ -4982,33 +5172,36 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
* so bail out early if the request is out of bound.
*/
if (unlikely(order >= MAX_ORDER)) {
- WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
+ WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
return NULL;
}
- gfp_mask &= gfp_allowed_mask;
- alloc_mask = gfp_mask;
- if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
+ gfp &= gfp_allowed_mask;
+ /*
+ * Apply scoped allocation constraints. This is mainly about GFP_NOFS
+ * resp. GFP_NOIO which has to be inherited for all allocation requests
+ * from a particular context which has been marked by
+ * memalloc_no{fs,io}_{save,restore}. And PF_MEMALLOC_PIN which ensures
+ * movable zones are not used during allocation.
+ */
+ gfp = current_gfp_context(gfp);
+ alloc_gfp = gfp;
+ if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
+ &alloc_gfp, &alloc_flags))
return NULL;
/*
* Forbid the first pass from falling back to types that fragment
* memory until all local zones are considered.
*/
- alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
+ alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);
/* First allocation attempt */
- page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
+ page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
if (likely(page))
goto out;
- /*
- * Apply scoped allocation constraints. This is mainly about GFP_NOFS
- * resp. GFP_NOIO which has to be inherited for all allocation requests
- * from a particular context which has been marked by
- * memalloc_no{fs,io}_{save,restore}.
- */
- alloc_mask = current_gfp_context(gfp_mask);
+ alloc_gfp = gfp;
ac.spread_dirty_pages = false;
/*
@@ -5017,20 +5210,20 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
*/
ac.nodemask = nodemask;
- page = __alloc_pages_slowpath(alloc_mask, order, &ac);
+ page = __alloc_pages_slowpath(alloc_gfp, order, &ac);
out:
- if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
- unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) {
+ if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT) && page &&
+ unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
__free_pages(page, order);
page = NULL;
}
- trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
+ trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
return page;
}
-EXPORT_SYMBOL(__alloc_pages_nodemask);
+EXPORT_SYMBOL(__alloc_pages);
/*
* Common helper functions. Never use with __GFP_HIGHMEM because the returned
@@ -5736,7 +5929,7 @@ static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
static int __parse_numa_zonelist_order(char *s)
{
/*
- * We used to support different zonlists modes but they turned
+ * We used to support different zonelists modes but they turned
* out to be just not useful. Let's keep the warning in place
* if somebody still use the cmd line parameter so that we do
* not fail it silently
@@ -7477,7 +7670,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
}
/*
- * Some architecturs, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
+ * Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
* such cases we allow max_zone_pfn sorted in the descending order
*/
bool __weak arch_has_descending_max_zone_pfns(void)
@@ -7689,7 +7882,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
return pages;
}
-void __init mem_init_print_info(const char *str)
+void __init mem_init_print_info(void)
{
unsigned long physpages, codesize, datasize, rosize, bss_size;
unsigned long init_code_size, init_data_size;
@@ -7728,17 +7921,17 @@ void __init mem_init_print_info(const char *str)
#ifdef CONFIG_HIGHMEM
", %luK highmem"
#endif
- "%s%s)\n",
+ ")\n",
nr_free_pages() << (PAGE_SHIFT - 10),
physpages << (PAGE_SHIFT - 10),
codesize >> 10, datasize >> 10, rosize >> 10,
(init_data_size + init_code_size) >> 10, bss_size >> 10,
(physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10),
- totalcma_pages << (PAGE_SHIFT - 10),
+ totalcma_pages << (PAGE_SHIFT - 10)
#ifdef CONFIG_HIGHMEM
- totalhigh_pages() << (PAGE_SHIFT - 10),
+ , totalhigh_pages() << (PAGE_SHIFT - 10)
#endif
- str ? ", " : "", str ? str : "");
+ );
}
/**
@@ -8222,6 +8415,7 @@ void *__init alloc_large_system_hash(const char *tablename,
void *table = NULL;
gfp_t gfp_flags;
bool virt;
+ bool huge;
/* allow the kernel cmdline to have a say */
if (!numentries) {
@@ -8289,6 +8483,7 @@ void *__init alloc_large_system_hash(const char *tablename,
} else if (get_order(size) >= MAX_ORDER || hashdist) {
table = __vmalloc(size, gfp_flags);
virt = true;
+ huge = is_vm_area_hugepages(table);
} else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -8305,7 +8500,7 @@ void *__init alloc_large_system_hash(const char *tablename,
pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
- virt ? "vmalloc" : "linear");
+ virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear");
if (_hash_shift)
*_hash_shift = log2qty;
@@ -8450,6 +8645,27 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
pageblock_nr_pages));
}
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+/* Usage: See admin-guide/dynamic-debug-howto.rst */
+static void alloc_contig_dump_pages(struct list_head *page_list)
+{
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure");
+
+ if (DYNAMIC_DEBUG_BRANCH(descriptor)) {
+ struct page *page;
+
+ dump_stack();
+ list_for_each_entry(page, page_list, lru)
+ dump_page(page, "migration failure");
+ }
+}
+#else
+static inline void alloc_contig_dump_pages(struct list_head *page_list)
+{
+}
+#endif
+
/* [start, end) must belong to a single zone. */
static int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long start, unsigned long end)
@@ -8464,7 +8680,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- migrate_prep();
+ lru_cache_disable();
while (pfn < end || !list_empty(&cc->migratepages)) {
if (fatal_signal_pending(current)) {
@@ -8474,14 +8690,13 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
if (list_empty(&cc->migratepages)) {
cc->nr_migratepages = 0;
- pfn = isolate_migratepages_range(cc, pfn, end);
- if (!pfn) {
- ret = -EINTR;
+ ret = isolate_migratepages_range(cc, pfn, end);
+ if (ret && ret != -EAGAIN)
break;
- }
+ pfn = cc->migrate_pfn;
tries = 0;
} else if (++tries == 5) {
- ret = ret < 0 ? ret : -EBUSY;
+ ret = -EBUSY;
break;
}
@@ -8491,8 +8706,18 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
+
+ /*
+ * On -ENOMEM, migrate_pages() bails out right away. It is pointless
+ * to retry again over this error, so do the same here.
+ */
+ if (ret == -ENOMEM)
+ break;
}
+
+ lru_cache_enable();
if (ret < 0) {
+ alloc_contig_dump_pages(&cc->migratepages);
putback_movable_pages(&cc->migratepages);
return ret;
}
@@ -8503,7 +8728,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
* @end: one-past-the-last PFN to allocate
- * @migratetype: migratetype of the underlaying pageblocks (either
+ * @migratetype: migratetype of the underlying pageblocks (either
* #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks
* in range must have the same migratetype and it must
* be either of the two.
@@ -8583,7 +8808,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret && ret != -EBUSY)
goto done;
- ret =0;
+ ret = 0;
/*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
@@ -8602,8 +8827,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* isolated thus they won't get removed from buddy.
*/
- lru_add_drain_all();
-
order = 0;
outer_start = start;
while (!PageBuddy(pfn_to_page(outer_start))) {
@@ -8629,8 +8852,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, 0)) {
- pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
- __func__, outer_start, end);
ret = -EBUSY;
goto done;
}
@@ -8680,12 +8901,6 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
if (PageReserved(page))
return false;
-
- if (page_count(page) > 0)
- return false;
-
- if (PageHuge(page))
- return false;
}
return true;
}
@@ -8757,9 +8972,9 @@ struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
}
#endif /* CONFIG_CONTIG_ALLOC */
-void free_contig_range(unsigned long pfn, unsigned int nr_pages)
+void free_contig_range(unsigned long pfn, unsigned long nr_pages)
{
- unsigned int count = 0;
+ unsigned long count = 0;
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
@@ -8767,13 +8982,13 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
count += page_count(page) != 1;
__free_page(page);
}
- WARN(count != 0, "%d pages are still in use!\n", count);
+ WARN(count != 0, "%lu pages are still in use!\n", count);
}
EXPORT_SYMBOL(free_contig_range);
/*
* The zone indicated has a new number of managed_pages; batch sizes and percpu
- * page high values need to be recalulated.
+ * page high values need to be recalculated.
*/
void __meminit zone_pcp_update(struct zone *zone)
{
@@ -8805,12 +9020,9 @@ void zone_pcp_enable(struct zone *zone)
void zone_pcp_reset(struct zone *zone)
{
- unsigned long flags;
int cpu;
struct per_cpu_pageset *pset;
- /* avoid races with drain_pages() */
- local_irq_save(flags);
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
@@ -8819,7 +9031,6 @@ void zone_pcp_reset(struct zone *zone)
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
- local_irq_restore(flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE