summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c248
1 files changed, 89 insertions, 159 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e3758a09a009..dd886fac451a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
};
int min_free_kbytes = 1024;
-int user_min_free_kbytes = -1;
+int user_min_free_kbytes;
static unsigned long __meminitdata nr_kernel_pages;
static unsigned long __meminitdata nr_all_pages;
@@ -234,8 +234,8 @@ int page_group_by_mobility_disabled __read_mostly;
void set_pageblock_migratetype(struct page *page, int migratetype)
{
- if (unlikely(page_group_by_mobility_disabled &&
- migratetype < MIGRATE_PCPTYPES))
+
+ if (unlikely(page_group_by_mobility_disabled))
migratetype = MIGRATE_UNMOVABLE;
set_pageblock_flags_group(page, (unsigned long)migratetype,
@@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page)
}
#endif
-static void bad_page(struct page *page, char *reason, unsigned long bad_flags)
+static void bad_page(struct page *page)
{
static unsigned long resume;
static unsigned long nr_shown;
@@ -329,7 +329,7 @@ static void bad_page(struct page *page, char *reason, unsigned long bad_flags)
printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
current->comm, page_to_pfn(page));
- dump_page_badflags(page, reason, bad_flags);
+ dump_page(page);
print_modules();
dump_stack();
@@ -383,7 +383,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
int bad = 0;
if (unlikely(compound_order(page) != order)) {
- bad_page(page, "wrong compound order", 0);
+ bad_page(page);
bad++;
}
@@ -392,11 +392,8 @@ static int destroy_compound_page(struct page *page, unsigned long order)
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- if (unlikely(!PageTail(p))) {
- bad_page(page, "PageTail not set", 0);
- bad++;
- } else if (unlikely(p->first_page != page)) {
- bad_page(page, "first_page not consistent", 0);
+ if (unlikely(!PageTail(p) || (p->first_page != page))) {
+ bad_page(page);
bad++;
}
__ClearPageTail(p);
@@ -509,12 +506,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
return 0;
if (page_is_guard(buddy) && page_order(buddy) == order) {
- VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+ VM_BUG_ON(page_count(buddy) != 0);
return 1;
}
if (PageBuddy(buddy) && page_order(buddy) == order) {
- VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
+ VM_BUG_ON(page_count(buddy) != 0);
return 1;
}
return 0;
@@ -564,8 +561,8 @@ static inline void __free_one_page(struct page *page,
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
- VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
- VM_BUG_ON_PAGE(bad_range(zone, page), page);
+ VM_BUG_ON(page_idx & ((1 << order) - 1));
+ VM_BUG_ON(bad_range(zone, page));
while (order < MAX_ORDER-1) {
buddy_idx = __find_buddy_index(page_idx, order);
@@ -621,26 +618,15 @@ out:
static inline int free_pages_check(struct page *page)
{
- char *bad_reason = NULL;
- unsigned long bad_flags = 0;
-
- if (unlikely(page_mapcount(page)))
- bad_reason = "nonzero mapcount";
- if (unlikely(page->mapping != NULL))
- bad_reason = "non-NULL mapping";
- if (unlikely(atomic_read(&page->_count) != 0))
- bad_reason = "nonzero _count";
- if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
- bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
- bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
- }
- if (unlikely(mem_cgroup_bad_page_check(page)))
- bad_reason = "cgroup check failed";
- if (unlikely(bad_reason)) {
- bad_page(page, bad_reason, bad_flags);
+ if (unlikely(page_mapcount(page) |
+ (page->mapping != NULL) |
+ (atomic_read(&page->_count) != 0) |
+ (page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
+ (mem_cgroup_bad_page_check(page)))) {
+ bad_page(page);
return 1;
}
- page_cpupid_reset_last(page);
+ page_nid_reset_last(page);
if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
return 0;
@@ -827,7 +813,7 @@ static inline void expand(struct zone *zone, struct page *page,
area--;
high--;
size >>= 1;
- VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
+ VM_BUG_ON(bad_range(zone, &page[size]));
#ifdef CONFIG_DEBUG_PAGEALLOC
if (high < debug_guardpage_minorder()) {
@@ -857,23 +843,12 @@ static inline void expand(struct zone *zone, struct page *page,
*/
static inline int check_new_page(struct page *page)
{
- char *bad_reason = NULL;
- unsigned long bad_flags = 0;
-
- if (unlikely(page_mapcount(page)))
- bad_reason = "nonzero mapcount";
- if (unlikely(page->mapping != NULL))
- bad_reason = "non-NULL mapping";
- if (unlikely(atomic_read(&page->_count) != 0))
- bad_reason = "nonzero _count";
- if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
- bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
- bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
- }
- if (unlikely(mem_cgroup_bad_page_check(page)))
- bad_reason = "cgroup check failed";
- if (unlikely(bad_reason)) {
- bad_page(page, bad_reason, bad_flags);
+ if (unlikely(page_mapcount(page) |
+ (page->mapping != NULL) |
+ (atomic_read(&page->_count) != 0) |
+ (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
+ (mem_cgroup_bad_page_check(page)))) {
+ bad_page(page);
return 1;
}
return 0;
@@ -980,7 +955,7 @@ int move_freepages(struct zone *zone,
for (page = start_page; page <= end_page;) {
/* Make sure we are not inadvertently changing nodes */
- VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
+ VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
@@ -1052,10 +1027,6 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
{
int current_order = page_order(page);
- /*
- * When borrowing from MIGRATE_CMA, we need to release the excess
- * buddy pages to CMA itself.
- */
if (is_migrate_cma(fallback_type))
return fallback_type;
@@ -1120,11 +1091,21 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
list_del(&page->lru);
rmv_page_order(page);
+ /*
+ * Borrow the excess buddy pages as well, irrespective
+ * of whether we stole freepages, or took ownership of
+ * the pageblock or not.
+ *
+ * Exception: When borrowing from MIGRATE_CMA, release
+ * the excess buddy pages to CMA itself.
+ */
expand(zone, page, order, current_order, area,
- new_type);
+ is_migrate_cma(migratetype)
+ ? migratetype : start_migratetype);
- trace_mm_page_alloc_extfrag(page, order, current_order,
- start_migratetype, migratetype, new_type);
+ trace_mm_page_alloc_extfrag(page, order,
+ current_order, start_migratetype, migratetype,
+ new_type == start_migratetype);
return page;
}
@@ -1429,8 +1410,8 @@ void split_page(struct page *page, unsigned int order)
{
int i;
- VM_BUG_ON_PAGE(PageCompound(page), page);
- VM_BUG_ON_PAGE(!page_count(page), page);
+ VM_BUG_ON(PageCompound(page));
+ VM_BUG_ON(!page_count(page));
#ifdef CONFIG_KMEMCHECK
/*
@@ -1577,7 +1558,7 @@ again:
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
- VM_BUG_ON_PAGE(bad_range(zone, page), page);
+ VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
goto again;
return page;
@@ -1730,7 +1711,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
* comments in mmzone.h. Reduces cache footprint of zonelist scans
* that have to skip over a lot of full or unallowed zones.
*
- * If the zonelist cache is present in the passed zonelist, then
+ * If the zonelist cache is present in the passed in zonelist, then
* returns a pointer to the allowed node mask (either the current
* tasks mems_allowed, or node_states[N_MEMORY].)
*
@@ -1841,7 +1822,7 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
static bool zone_local(struct zone *local_zone, struct zone *zone)
{
- return local_zone->node == zone->node;
+ return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE;
}
static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
@@ -1938,17 +1919,18 @@ zonelist_scan:
* page was allocated in should have no effect on the
* time the page has in memory before being reclaimed.
*
- * Try to stay in local zones in the fastpath. If
- * that fails, the slowpath is entered, which will do
- * another pass starting with the local zones, but
- * ultimately fall back to remote zones that do not
- * partake in the fairness round-robin cycle of this
- * zonelist.
+ * When zone_reclaim_mode is enabled, try to stay in
+ * local zones in the fastpath. If that fails, the
+ * slowpath is entered, which will do another pass
+ * starting with the local zones, but ultimately fall
+ * back to remote zones that do not partake in the
+ * fairness round-robin cycle of this zonelist.
*/
if (alloc_flags & ALLOC_WMARK_LOW) {
if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
continue;
- if (!zone_local(preferred_zone, zone))
+ if (zone_reclaim_mode &&
+ !zone_local(preferred_zone, zone))
continue;
}
/*
@@ -2097,6 +2079,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
return;
/*
+ * Walking all memory to count page types is very expensive and should
+ * be inhibited in non-blockable contexts.
+ */
+ if (!(gfp_mask & __GFP_WAIT))
+ filter |= SHOW_MEM_FILTER_PAGE_COUNT;
+
+ /*
* This documents exceptions given to allocations in certain
* contexts that are allowed to allocate outside current's set
* of allowed nodes.
@@ -2260,7 +2249,10 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
preferred_zone, migratetype);
if (page) {
preferred_zone->compact_blockskip_flush = false;
- compaction_defer_reset(preferred_zone, order, true);
+ preferred_zone->compact_considered = 0;
+ preferred_zone->compact_defer_shift = 0;
+ if (order >= preferred_zone->compact_order_failed)
+ preferred_zone->compact_order_failed = order + 1;
count_vm_event(COMPACTSUCCESS);
return page;
}
@@ -2404,7 +2396,7 @@ static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
* thrash fairness information for zones that are not
* actually part of this zonelist's round-robin cycle.
*/
- if (!zone_local(preferred_zone, zone))
+ if (zone_reclaim_mode && !zone_local(preferred_zone, zone))
continue;
mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) -
@@ -2550,15 +2542,8 @@ rebalance:
}
/* Atomic allocations - we can't balance anything */
- if (!wait) {
- /*
- * All existing users of the deprecated __GFP_NOFAIL are
- * blockable, so warn of any new users that actually allow this
- * type of allocation to fail.
- */
- WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL);
+ if (!wait)
goto nopage;
- }
/* Avoid recursion of direct reclaim */
if (current->flags & PF_MEMALLOC)
@@ -2608,7 +2593,7 @@ rebalance:
* running out of options and have to consider going OOM
*/
if (!did_some_progress) {
- if (oom_gfp_allowed(gfp_mask)) {
+ if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
if (oom_killer_disabled)
goto nopage;
/* Coredumps can quickly deplete all memory reserves */
@@ -3896,6 +3881,8 @@ static inline unsigned long wait_table_bits(unsigned long size)
return ffz(~size);
}
+#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
/*
* Check if a pageblock contains reserved pages
*/
@@ -3923,7 +3910,6 @@ static void setup_zone_migrate_reserve(struct zone *zone)
struct page *page;
unsigned long block_migratetype;
int reserve;
- int old_reserve;
/*
* Get the start pfn, end pfn and the number of blocks to reserve
@@ -3945,12 +3931,6 @@ static void setup_zone_migrate_reserve(struct zone *zone)
* future allocation of hugepages at runtime.
*/
reserve = min(2, reserve);
- old_reserve = zone->nr_migrate_reserve_block;
-
- /* When memory hot-add, we almost always need to do nothing */
- if (reserve == old_reserve)
- return;
- zone->nr_migrate_reserve_block = reserve;
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
if (!pfn_valid(pfn))
@@ -3988,12 +3968,6 @@ static void setup_zone_migrate_reserve(struct zone *zone)
reserve--;
continue;
}
- } else if (!old_reserve) {
- /*
- * At boot time we don't need to scan the whole zone
- * for turning off MIGRATE_RESERVE.
- */
- break;
}
/*
@@ -4041,7 +4015,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
mminit_verify_page_links(page, zone, nid, pfn);
init_page_count(page);
page_mapcount_reset(page);
- page_cpupid_reset_last(page);
+ page_nid_reset_last(page);
SetPageReserved(page);
/*
* Mark the block movable so that blocks are reserved for
@@ -4244,6 +4218,7 @@ static noinline __init_refok
int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
{
int i;
+ struct pglist_data *pgdat = zone->zone_pgdat;
size_t alloc_size;
/*
@@ -4259,8 +4234,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
if (!slab_is_available()) {
zone->wait_table = (wait_queue_head_t *)
- memblock_virt_alloc_node_nopanic(
- alloc_size, zone->zone_pgdat->node_id);
+ alloc_bootmem_node_nopanic(pgdat, alloc_size);
} else {
/*
* This case means that a zone whose size was 0 gets new memory
@@ -4292,7 +4266,7 @@ static __meminit void zone_pcp_init(struct zone *zone)
*/
zone->pageset = &boot_pageset;
- if (populated_zone(zone))
+ if (zone->present_pages)
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n",
zone->name, zone->present_pages,
zone_batchsize(zone));
@@ -4380,14 +4354,13 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
#endif
/**
- * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
+ * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
* @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
- * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid
+ * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node
*
* If an architecture guarantees that all ranges registered with
* add_active_ranges() contain no holes and may be freed, this
- * this function may be used instead of calling memblock_free_early_nid()
- * manually.
+ * this function may be used instead of calling free_bootmem() manually.
*/
void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
{
@@ -4399,9 +4372,9 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
end_pfn = min(end_pfn, max_low_pfn);
if (start_pfn < end_pfn)
- memblock_free_early_nid(PFN_PHYS(start_pfn),
- (end_pfn - start_pfn) << PAGE_SHIFT,
- this_nid);
+ free_bootmem_node(NODE_DATA(this_nid),
+ PFN_PHYS(start_pfn),
+ (end_pfn - start_pfn) << PAGE_SHIFT);
}
}
@@ -4672,9 +4645,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
zone->pageblock_flags = NULL;
if (usemapsize)
- zone->pageblock_flags =
- memblock_virt_alloc_node_nopanic(usemapsize,
- pgdat->node_id);
+ zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
+ usemapsize);
}
#else
static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
@@ -4868,8 +4840,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
- map = memblock_virt_alloc_node_nopanic(size,
- pgdat->node_id);
+ map = alloc_bootmem_node_nopanic(pgdat, size);
pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -5050,33 +5021,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nodemask_t saved_node_state = node_states[N_MEMORY];
unsigned long totalpages = early_calculate_totalpages();
int usable_nodes = nodes_weight(node_states[N_MEMORY]);
- struct memblock_type *type = &memblock.memory;
-
- /* Need to find movable_zone earlier when movable_node is specified. */
- find_usable_zone_for_movable();
-
- /*
- * If movable_node is specified, ignore kernelcore and movablecore
- * options.
- */
- if (movable_node_is_enabled()) {
- for (i = 0; i < type->cnt; i++) {
- if (!memblock_is_hotpluggable(&type->regions[i]))
- continue;
-
- nid = type->regions[i].nid;
-
- usable_startpfn = PFN_DOWN(type->regions[i].base);
- zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
- min(usable_startpfn, zone_movable_pfn[nid]) :
- usable_startpfn;
- }
-
- goto out2;
- }
/*
- * If movablecore=nn[KMG] was specified, calculate what size of
+ * If movablecore was specified, calculate what size of
* kernelcore that corresponds so that memory usable for
* any allocation type is evenly spread. If both kernelcore
* and movablecore are specified, then the value of kernelcore
@@ -5102,6 +5049,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
goto out;
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
+ find_usable_zone_for_movable();
usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
restart:
@@ -5192,7 +5140,6 @@ restart:
if (usable_nodes && required_kernelcore > usable_nodes)
goto restart;
-out2:
/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
for (nid = 0; nid < MAX_NUMNODES; nid++)
zone_movable_pfn[nid] =
@@ -5213,7 +5160,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid)
for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
struct zone *zone = &pgdat->node_zones[zone_type];
- if (populated_zone(zone)) {
+ if (zone->present_pages) {
node_set_state(nid, N_HIGH_MEMORY);
if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
zone_type <= ZONE_NORMAL)
@@ -5754,12 +5701,7 @@ module_init(init_per_zone_wmark_min)
int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- int rc;
-
- rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
- if (rc)
- return rc;
-
+ proc_dointvec(table, write, buffer, length, ppos);
if (write) {
user_min_free_kbytes = min_free_kbytes;
setup_per_zone_wmarks();
@@ -5924,7 +5866,7 @@ void *__init alloc_large_system_hash(const char *tablename,
do {
size = bucketsize << log2qty;
if (flags & HASH_EARLY)
- table = memblock_virt_alloc_nopanic(size, 0);
+ table = alloc_bootmem_nopanic(size);
else if (hashdist)
table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
else {
@@ -6026,7 +5968,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
pfn = page_to_pfn(page);
bitmap = get_pageblock_bitmap(zone, pfn);
bitidx = pfn_to_bitidx(zone, pfn);
- VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
+ VM_BUG_ON(!zone_spans_pfn(zone, pfn));
for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
if (flags & value)
@@ -6524,24 +6466,12 @@ static void dump_page_flags(unsigned long flags)
printk(")\n");
}
-void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
+void dump_page(struct page *page)
{
printk(KERN_ALERT
"page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
page, atomic_read(&page->_count), page_mapcount(page),
page->mapping, page->index);
dump_page_flags(page->flags);
- if (reason)
- pr_alert("page dumped because: %s\n", reason);
- if (page->flags & badflags) {
- pr_alert("bad because of flags:\n");
- dump_page_flags(page->flags & badflags);
- }
mem_cgroup_print_bad_page(page);
}
-
-void dump_page(struct page *page, char *reason)
-{
- dump_page_badflags(page, reason, 0);
-}
-EXPORT_SYMBOL_GPL(dump_page);