diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 248 |
1 files changed, 89 insertions, 159 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e3758a09a009..dd886fac451a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -205,7 +205,7 @@ static char * const zone_names[MAX_NR_ZONES] = { }; int min_free_kbytes = 1024; -int user_min_free_kbytes = -1; +int user_min_free_kbytes; static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; @@ -234,8 +234,8 @@ int page_group_by_mobility_disabled __read_mostly; void set_pageblock_migratetype(struct page *page, int migratetype) { - if (unlikely(page_group_by_mobility_disabled && - migratetype < MIGRATE_PCPTYPES)) + + if (unlikely(page_group_by_mobility_disabled)) migratetype = MIGRATE_UNMOVABLE; set_pageblock_flags_group(page, (unsigned long)migratetype, @@ -295,7 +295,7 @@ static inline int bad_range(struct zone *zone, struct page *page) } #endif -static void bad_page(struct page *page, char *reason, unsigned long bad_flags) +static void bad_page(struct page *page) { static unsigned long resume; static unsigned long nr_shown; @@ -329,7 +329,7 @@ static void bad_page(struct page *page, char *reason, unsigned long bad_flags) printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", current->comm, page_to_pfn(page)); - dump_page_badflags(page, reason, bad_flags); + dump_page(page); print_modules(); dump_stack(); @@ -383,7 +383,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) int bad = 0; if (unlikely(compound_order(page) != order)) { - bad_page(page, "wrong compound order", 0); + bad_page(page); bad++; } @@ -392,11 +392,8 @@ static int destroy_compound_page(struct page *page, unsigned long order) for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - if (unlikely(!PageTail(p))) { - bad_page(page, "PageTail not set", 0); - bad++; - } else if (unlikely(p->first_page != page)) { - bad_page(page, "first_page not consistent", 0); + if (unlikely(!PageTail(p) || (p->first_page != page))) { + bad_page(page); bad++; } __ClearPageTail(p); @@ -509,12 +506,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; if (page_is_guard(buddy) && page_order(buddy) == order) { - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + VM_BUG_ON(page_count(buddy) != 0); return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + VM_BUG_ON(page_count(buddy) != 0); return 1; } return 0; @@ -564,8 +561,8 @@ static inline void __free_one_page(struct page *page, page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); - VM_BUG_ON_PAGE(bad_range(zone, page), page); + VM_BUG_ON(page_idx & ((1 << order) - 1)); + VM_BUG_ON(bad_range(zone, page)); while (order < MAX_ORDER-1) { buddy_idx = __find_buddy_index(page_idx, order); @@ -621,26 +618,15 @@ out: static inline int free_pages_check(struct page *page) { - char *bad_reason = NULL; - unsigned long bad_flags = 0; - - if (unlikely(page_mapcount(page))) - bad_reason = "nonzero mapcount"; - if (unlikely(page->mapping != NULL)) - bad_reason = "non-NULL mapping"; - if (unlikely(atomic_read(&page->_count) != 0)) - bad_reason = "nonzero _count"; - if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) { - bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; - bad_flags = PAGE_FLAGS_CHECK_AT_FREE; - } - if (unlikely(mem_cgroup_bad_page_check(page))) - bad_reason = "cgroup check failed"; - if (unlikely(bad_reason)) { - bad_page(page, bad_reason, bad_flags); + if (unlikely(page_mapcount(page) | + (page->mapping != NULL) | + (atomic_read(&page->_count) != 0) | + (page->flags & PAGE_FLAGS_CHECK_AT_FREE) | + (mem_cgroup_bad_page_check(page)))) { + bad_page(page); return 1; } - page_cpupid_reset_last(page); + page_nid_reset_last(page); if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; return 0; @@ -827,7 +813,7 @@ static inline void expand(struct zone *zone, struct page *page, area--; high--; size >>= 1; - VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); + VM_BUG_ON(bad_range(zone, &page[size])); #ifdef CONFIG_DEBUG_PAGEALLOC if (high < debug_guardpage_minorder()) { @@ -857,23 +843,12 @@ static inline void expand(struct zone *zone, struct page *page, */ static inline int check_new_page(struct page *page) { - char *bad_reason = NULL; - unsigned long bad_flags = 0; - - if (unlikely(page_mapcount(page))) - bad_reason = "nonzero mapcount"; - if (unlikely(page->mapping != NULL)) - bad_reason = "non-NULL mapping"; - if (unlikely(atomic_read(&page->_count) != 0)) - bad_reason = "nonzero _count"; - if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { - bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; - bad_flags = PAGE_FLAGS_CHECK_AT_PREP; - } - if (unlikely(mem_cgroup_bad_page_check(page))) - bad_reason = "cgroup check failed"; - if (unlikely(bad_reason)) { - bad_page(page, bad_reason, bad_flags); + if (unlikely(page_mapcount(page) | + (page->mapping != NULL) | + (atomic_read(&page->_count) != 0) | + (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | + (mem_cgroup_bad_page_check(page)))) { + bad_page(page); return 1; } return 0; @@ -980,7 +955,7 @@ int move_freepages(struct zone *zone, for (page = start_page; page <= end_page;) { /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); + VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); if (!pfn_valid_within(page_to_pfn(page))) { page++; @@ -1052,10 +1027,6 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, { int current_order = page_order(page); - /* - * When borrowing from MIGRATE_CMA, we need to release the excess - * buddy pages to CMA itself. - */ if (is_migrate_cma(fallback_type)) return fallback_type; @@ -1120,11 +1091,21 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) list_del(&page->lru); rmv_page_order(page); + /* + * Borrow the excess buddy pages as well, irrespective + * of whether we stole freepages, or took ownership of + * the pageblock or not. + * + * Exception: When borrowing from MIGRATE_CMA, release + * the excess buddy pages to CMA itself. + */ expand(zone, page, order, current_order, area, - new_type); + is_migrate_cma(migratetype) + ? migratetype : start_migratetype); - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, migratetype, new_type); + trace_mm_page_alloc_extfrag(page, order, + current_order, start_migratetype, migratetype, + new_type == start_migratetype); return page; } @@ -1429,8 +1410,8 @@ void split_page(struct page *page, unsigned int order) { int i; - VM_BUG_ON_PAGE(PageCompound(page), page); - VM_BUG_ON_PAGE(!page_count(page), page); + VM_BUG_ON(PageCompound(page)); + VM_BUG_ON(!page_count(page)); #ifdef CONFIG_KMEMCHECK /* @@ -1577,7 +1558,7 @@ again: zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); - VM_BUG_ON_PAGE(bad_range(zone, page), page); + VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page; @@ -1730,7 +1711,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, * comments in mmzone.h. Reduces cache footprint of zonelist scans * that have to skip over a lot of full or unallowed zones. * - * If the zonelist cache is present in the passed zonelist, then + * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current * tasks mems_allowed, or node_states[N_MEMORY].) * @@ -1841,7 +1822,7 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) static bool zone_local(struct zone *local_zone, struct zone *zone) { - return local_zone->node == zone->node; + return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE; } static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) @@ -1938,17 +1919,18 @@ zonelist_scan: * page was allocated in should have no effect on the * time the page has in memory before being reclaimed. * - * Try to stay in local zones in the fastpath. If - * that fails, the slowpath is entered, which will do - * another pass starting with the local zones, but - * ultimately fall back to remote zones that do not - * partake in the fairness round-robin cycle of this - * zonelist. + * When zone_reclaim_mode is enabled, try to stay in + * local zones in the fastpath. If that fails, the + * slowpath is entered, which will do another pass + * starting with the local zones, but ultimately fall + * back to remote zones that do not partake in the + * fairness round-robin cycle of this zonelist. */ if (alloc_flags & ALLOC_WMARK_LOW) { if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) continue; - if (!zone_local(preferred_zone, zone)) + if (zone_reclaim_mode && + !zone_local(preferred_zone, zone)) continue; } /* @@ -2097,6 +2079,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) return; /* + * Walking all memory to count page types is very expensive and should + * be inhibited in non-blockable contexts. + */ + if (!(gfp_mask & __GFP_WAIT)) + filter |= SHOW_MEM_FILTER_PAGE_COUNT; + + /* * This documents exceptions given to allocations in certain * contexts that are allowed to allocate outside current's set * of allowed nodes. @@ -2260,7 +2249,10 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, preferred_zone, migratetype); if (page) { preferred_zone->compact_blockskip_flush = false; - compaction_defer_reset(preferred_zone, order, true); + preferred_zone->compact_considered = 0; + preferred_zone->compact_defer_shift = 0; + if (order >= preferred_zone->compact_order_failed) + preferred_zone->compact_order_failed = order + 1; count_vm_event(COMPACTSUCCESS); return page; } @@ -2404,7 +2396,7 @@ static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, * thrash fairness information for zones that are not * actually part of this zonelist's round-robin cycle. */ - if (!zone_local(preferred_zone, zone)) + if (zone_reclaim_mode && !zone_local(preferred_zone, zone)) continue; mod_zone_page_state(zone, NR_ALLOC_BATCH, high_wmark_pages(zone) - @@ -2550,15 +2542,8 @@ rebalance: } /* Atomic allocations - we can't balance anything */ - if (!wait) { - /* - * All existing users of the deprecated __GFP_NOFAIL are - * blockable, so warn of any new users that actually allow this - * type of allocation to fail. - */ - WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); + if (!wait) goto nopage; - } /* Avoid recursion of direct reclaim */ if (current->flags & PF_MEMALLOC) @@ -2608,7 +2593,7 @@ rebalance: * running out of options and have to consider going OOM */ if (!did_some_progress) { - if (oom_gfp_allowed(gfp_mask)) { + if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { if (oom_killer_disabled) goto nopage; /* Coredumps can quickly deplete all memory reserves */ @@ -3896,6 +3881,8 @@ static inline unsigned long wait_table_bits(unsigned long size) return ffz(~size); } +#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) + /* * Check if a pageblock contains reserved pages */ @@ -3923,7 +3910,6 @@ static void setup_zone_migrate_reserve(struct zone *zone) struct page *page; unsigned long block_migratetype; int reserve; - int old_reserve; /* * Get the start pfn, end pfn and the number of blocks to reserve @@ -3945,12 +3931,6 @@ static void setup_zone_migrate_reserve(struct zone *zone) * future allocation of hugepages at runtime. */ reserve = min(2, reserve); - old_reserve = zone->nr_migrate_reserve_block; - - /* When memory hot-add, we almost always need to do nothing */ - if (reserve == old_reserve) - return; - zone->nr_migrate_reserve_block = reserve; for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { if (!pfn_valid(pfn)) @@ -3988,12 +3968,6 @@ static void setup_zone_migrate_reserve(struct zone *zone) reserve--; continue; } - } else if (!old_reserve) { - /* - * At boot time we don't need to scan the whole zone - * for turning off MIGRATE_RESERVE. - */ - break; } /* @@ -4041,7 +4015,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, mminit_verify_page_links(page, zone, nid, pfn); init_page_count(page); page_mapcount_reset(page); - page_cpupid_reset_last(page); + page_nid_reset_last(page); SetPageReserved(page); /* * Mark the block movable so that blocks are reserved for @@ -4244,6 +4218,7 @@ static noinline __init_refok int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; + struct pglist_data *pgdat = zone->zone_pgdat; size_t alloc_size; /* @@ -4259,8 +4234,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) if (!slab_is_available()) { zone->wait_table = (wait_queue_head_t *) - memblock_virt_alloc_node_nopanic( - alloc_size, zone->zone_pgdat->node_id); + alloc_bootmem_node_nopanic(pgdat, alloc_size); } else { /* * This case means that a zone whose size was 0 gets new memory @@ -4292,7 +4266,7 @@ static __meminit void zone_pcp_init(struct zone *zone) */ zone->pageset = &boot_pageset; - if (populated_zone(zone)) + if (zone->present_pages) printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", zone->name, zone->present_pages, zone_batchsize(zone)); @@ -4380,14 +4354,13 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node) #endif /** - * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range + * free_bootmem_with_active_regions - Call free_bootmem_node for each active range * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. - * @max_low_pfn: The highest PFN that will be passed to memblock_free_early_nid + * @max_low_pfn: The highest PFN that will be passed to free_bootmem_node * * If an architecture guarantees that all ranges registered with * add_active_ranges() contain no holes and may be freed, this - * this function may be used instead of calling memblock_free_early_nid() - * manually. + * this function may be used instead of calling free_bootmem() manually. */ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) { @@ -4399,9 +4372,9 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) end_pfn = min(end_pfn, max_low_pfn); if (start_pfn < end_pfn) - memblock_free_early_nid(PFN_PHYS(start_pfn), - (end_pfn - start_pfn) << PAGE_SHIFT, - this_nid); + free_bootmem_node(NODE_DATA(this_nid), + PFN_PHYS(start_pfn), + (end_pfn - start_pfn) << PAGE_SHIFT); } } @@ -4672,9 +4645,8 @@ static void __init setup_usemap(struct pglist_data *pgdat, unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); zone->pageblock_flags = NULL; if (usemapsize) - zone->pageblock_flags = - memblock_virt_alloc_node_nopanic(usemapsize, - pgdat->node_id); + zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, + usemapsize); } #else static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, @@ -4868,8 +4840,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); if (!map) - map = memblock_virt_alloc_node_nopanic(size, - pgdat->node_id); + map = alloc_bootmem_node_nopanic(pgdat, size); pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -5050,33 +5021,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) nodemask_t saved_node_state = node_states[N_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); int usable_nodes = nodes_weight(node_states[N_MEMORY]); - struct memblock_type *type = &memblock.memory; - - /* Need to find movable_zone earlier when movable_node is specified. */ - find_usable_zone_for_movable(); - - /* - * If movable_node is specified, ignore kernelcore and movablecore - * options. - */ - if (movable_node_is_enabled()) { - for (i = 0; i < type->cnt; i++) { - if (!memblock_is_hotpluggable(&type->regions[i])) - continue; - - nid = type->regions[i].nid; - - usable_startpfn = PFN_DOWN(type->regions[i].base); - zone_movable_pfn[nid] = zone_movable_pfn[nid] ? - min(usable_startpfn, zone_movable_pfn[nid]) : - usable_startpfn; - } - - goto out2; - } /* - * If movablecore=nn[KMG] was specified, calculate what size of + * If movablecore was specified, calculate what size of * kernelcore that corresponds so that memory usable for * any allocation type is evenly spread. If both kernelcore * and movablecore are specified, then the value of kernelcore @@ -5102,6 +5049,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) goto out; /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ + find_usable_zone_for_movable(); usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; restart: @@ -5192,7 +5140,6 @@ restart: if (usable_nodes && required_kernelcore > usable_nodes) goto restart; -out2: /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ for (nid = 0; nid < MAX_NUMNODES; nid++) zone_movable_pfn[nid] = @@ -5213,7 +5160,7 @@ static void check_for_memory(pg_data_t *pgdat, int nid) for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; - if (populated_zone(zone)) { + if (zone->present_pages) { node_set_state(nid, N_HIGH_MEMORY); if (N_NORMAL_MEMORY != N_HIGH_MEMORY && zone_type <= ZONE_NORMAL) @@ -5754,12 +5701,7 @@ module_init(init_per_zone_wmark_min) int min_free_kbytes_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { - int rc; - - rc = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (rc) - return rc; - + proc_dointvec(table, write, buffer, length, ppos); if (write) { user_min_free_kbytes = min_free_kbytes; setup_per_zone_wmarks(); @@ -5924,7 +5866,7 @@ void *__init alloc_large_system_hash(const char *tablename, do { size = bucketsize << log2qty; if (flags & HASH_EARLY) - table = memblock_virt_alloc_nopanic(size, 0); + table = alloc_bootmem_nopanic(size); else if (hashdist) table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); else { @@ -6026,7 +5968,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); - VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); + VM_BUG_ON(!zone_spans_pfn(zone, pfn)); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) @@ -6524,24 +6466,12 @@ static void dump_page_flags(unsigned long flags) printk(")\n"); } -void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) +void dump_page(struct page *page) { printk(KERN_ALERT "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", page, atomic_read(&page->_count), page_mapcount(page), page->mapping, page->index); dump_page_flags(page->flags); - if (reason) - pr_alert("page dumped because: %s\n", reason); - if (page->flags & badflags) { - pr_alert("bad because of flags:\n"); - dump_page_flags(page->flags & badflags); - } mem_cgroup_print_bad_page(page); } - -void dump_page(struct page *page, char *reason) -{ - dump_page_badflags(page, reason, 0); -} -EXPORT_SYMBOL_GPL(dump_page); |