diff options
Diffstat (limited to 'mm/sparse.c')
-rw-r--r-- | mm/sparse.c | 340 |
1 files changed, 175 insertions, 165 deletions
diff --git a/mm/sparse.c b/mm/sparse.c index 93250207c5cf..4ac1d7ef548f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -6,7 +6,7 @@ #include <linux/mmzone.h> #include <linux/bootmem.h> #include <linux/highmem.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> #include "internal.h" @@ -40,7 +40,7 @@ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #endif -int page_to_nid(struct page *page) +int page_to_nid(const struct page *page) { return section_to_node_table[page_to_section(page)]; } @@ -65,24 +65,20 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) if (slab_is_available()) { if (node_state(nid, N_HIGH_MEMORY)) - section = kmalloc_node(array_size, GFP_KERNEL, nid); + section = kzalloc_node(array_size, GFP_KERNEL, nid); else - section = kmalloc(array_size, GFP_KERNEL); - } else + section = kzalloc(array_size, GFP_KERNEL); + } else { section = alloc_bootmem_node(NODE_DATA(nid), array_size); - - if (section) - memset(section, 0, array_size); + } return section; } static int __meminit sparse_index_init(unsigned long section_nr, int nid) { - static DEFINE_SPINLOCK(index_init_lock); unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; - int ret = 0; if (mem_section[root]) return -EEXIST; @@ -90,21 +86,10 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) section = sparse_index_alloc(nid); if (!section) return -ENOMEM; - /* - * This lock keeps two different sections from - * reallocating for the same index - */ - spin_lock(&index_init_lock); - - if (mem_section[root]) { - ret = -EEXIST; - goto out; - } mem_section[root] = section; -out: - spin_unlock(&index_init_lock); - return ret; + + return 0; } #else /* !SPARSEMEM_EXTREME */ static inline int sparse_index_init(unsigned long section_nr, int nid) @@ -132,6 +117,8 @@ int __section_nr(struct mem_section* ms) break; } + VM_BUG_ON(root_nr == NR_SECTION_ROOTS); + return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } @@ -273,10 +260,11 @@ static unsigned long *__kmalloc_section_usemap(void) #ifdef CONFIG_MEMORY_HOTREMOVE static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - unsigned long section_nr; - + unsigned long goal, limit; + unsigned long *p; + int nid; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while @@ -287,8 +275,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * from the same section as the pgdat where possible to avoid * this problem. */ - section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); - return alloc_bootmem_section(usemap_size() * count, section_nr); + goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); + limit = goal + (1UL << PA_SECTION_SHIFT); + nid = early_pfn_to_nid(goal >> PAGE_SHIFT); +again: + p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, + SMP_CACHE_BYTES, goal, limit); + if (!p && limit) { + limit = 0; + goto again; + } + return p; } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -332,9 +329,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) #else static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - return NULL; + return alloc_bootmem_node_nopanic(pgdat, size); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -342,40 +339,30 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, +static void __init sparse_early_usemaps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, unsigned long usemap_count, int nodeid) { void *usemap; unsigned long pnum; + unsigned long **usemap_map = (unsigned long **)data; int size = usemap_size(); usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), - usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; - } + size * usemap_count); + if (!usemap) { + printk(KERN_WARNING "%s: allocation failed\n", __func__); return; } - usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; - check_usemap_section_nr(nodeid, usemap_map[pnum]); - } - return; + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + usemap_map[pnum] = usemap; + usemap += size; + check_usemap_section_nr(nodeid, usemap_map[pnum]); } - - printk(KERN_WARNING "%s: allocation failed\n", __func__); } #ifndef CONFIG_SPARSEMEM_VMEMMAP @@ -444,11 +431,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER -static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, +static void __init sparse_early_mem_maps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count, int nodeid) { + struct page **map_map = (struct page **)data; sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, map_count, nodeid); } @@ -474,41 +462,18 @@ void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) { } -/* - * Allocate the accumulated non-linear sections, allocate a mem_map - * for each and record the physical to section mapping. +/** + * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap + * @map: usemap_map for pageblock flags or mmap_map for vmemmap */ -void __init sparse_init(void) +static void __init alloc_usemap_and_memmap(void (*alloc_func) + (void *, unsigned long, unsigned long, + unsigned long, int), void *data) { unsigned long pnum; - struct page *map; - unsigned long *usemap; - unsigned long **usemap_map; - int size; + unsigned long map_count; int nodeid_begin = 0; unsigned long pnum_begin = 0; - unsigned long usemap_count; -#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER - unsigned long map_count; - int size2; - struct page **map_map; -#endif - - /* - * map is using big page (aka 2M in x86 64 bit) - * usemap is less one page (aka 24 bytes) - * so alloc 2M (with 2M align) and 24 bytes in turn will - * make next 2M slip to one more 2M later. - * then in big system, the memory will have a lot of holes... - * here try to allocate 2M pages continously. - * - * powerpc need to call sparse_init_one_section right after each - * sparse_early_mem_map_alloc, so allocate usemap_map at first. - */ - size = sizeof(unsigned long *) * NR_MEM_SECTIONS; - usemap_map = alloc_bootmem(size); - if (!usemap_map) - panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { struct mem_section *ms; @@ -520,7 +485,7 @@ void __init sparse_init(void) pnum_begin = pnum; break; } - usemap_count = 1; + map_count = 1; for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { struct mem_section *ms; int nodeid; @@ -530,61 +495,69 @@ void __init sparse_init(void) ms = __nr_to_section(pnum); nodeid = sparse_early_nid(ms); if (nodeid == nodeid_begin) { - usemap_count++; + map_count++; continue; } /* ok, we need to take cake of from pnum_begin to pnum - 1*/ - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, - usemap_count, nodeid_begin); + alloc_func(data, pnum_begin, pnum, + map_count, nodeid_begin); /* new start, update count etc*/ nodeid_begin = nodeid; pnum_begin = pnum; - usemap_count = 1; + map_count = 1; } /* ok, last chunk */ - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, - usemap_count, nodeid_begin); + alloc_func(data, pnum_begin, NR_MEM_SECTIONS, + map_count, nodeid_begin); +} + +/* + * Allocate the accumulated non-linear sections, allocate a mem_map + * for each and record the physical to section mapping. + */ +void __init sparse_init(void) +{ + unsigned long pnum; + struct page *map; + unsigned long *usemap; + unsigned long **usemap_map; + int size; +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER + int size2; + struct page **map_map; +#endif + + /* see include/linux/mmzone.h 'struct mem_section' definition */ + BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section))); + + /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ + set_pageblock_order(); + + /* + * map is using big page (aka 2M in x86 64 bit) + * usemap is less one page (aka 24 bytes) + * so alloc 2M (with 2M align) and 24 bytes in turn will + * make next 2M slip to one more 2M later. + * then in big system, the memory will have a lot of holes... + * here try to allocate 2M pages continuously. + * + * powerpc need to call sparse_init_one_section right after each + * sparse_early_mem_map_alloc, so allocate usemap_map at first. + */ + size = sizeof(unsigned long *) * NR_MEM_SECTIONS; + usemap_map = alloc_bootmem(size); + if (!usemap_map) + panic("can not allocate usemap_map\n"); + alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, + (void *)usemap_map); #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER size2 = sizeof(struct page *) * NR_MEM_SECTIONS; map_map = alloc_bootmem(size2); if (!map_map) panic("can not allocate map_map\n"); - - for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { - struct mem_section *ms; - - if (!present_section_nr(pnum)) - continue; - ms = __nr_to_section(pnum); - nodeid_begin = sparse_early_nid(ms); - pnum_begin = pnum; - break; - } - map_count = 1; - for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { - struct mem_section *ms; - int nodeid; - - if (!present_section_nr(pnum)) - continue; - ms = __nr_to_section(pnum); - nodeid = sparse_early_nid(ms); - if (nodeid == nodeid_begin) { - map_count++; - continue; - } - /* ok, we need to take cake of from pnum_begin to pnum - 1*/ - sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, - map_count, nodeid_begin); - /* new start, update count etc*/ - nodeid_begin = nodeid; - pnum_begin = pnum; - map_count = 1; - } - /* ok, last chunk */ - sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, - map_count, nodeid_begin); + alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node, + (void *)map_map); #endif for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { @@ -625,11 +598,20 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, } static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + vmemmap_free(start, end); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + vmemmap_free(start, end); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) { @@ -648,7 +630,6 @@ static struct page *__kmalloc_section_memmap(unsigned long nr_pages) got_map_page: ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); got_map_ptr: - memset(ret, 0, memmap_size); return ret; } @@ -668,10 +649,12 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) get_order(sizeof(struct page) * nr_pages)); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_map_bootmem(struct page *memmap, unsigned long nr_pages) { unsigned long maps_section_nr, removing_section_nr, i; unsigned long magic; + struct page *page = virt_to_page(memmap); for (i = 0; i < nr_pages; i++, page++) { magic = (unsigned long) page->lru.next; @@ -693,43 +676,9 @@ static void free_map_bootmem(struct page *page, unsigned long nr_pages) put_page_bootmem(page); } } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static void free_section_usemap(struct page *memmap, unsigned long *usemap) -{ - struct page *usemap_page; - unsigned long nr_pages; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, PAGES_PER_SECTION); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) { - struct page *memmap_page; - memmap_page = virt_to_page(memmap); - - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) - >> PAGE_SHIFT; - - free_map_bootmem(memmap_page, nr_pages); - } -} - /* * returns the number of sections whose mem_maps were properly * set. If this is <=0, then that means that the passed-in @@ -770,6 +719,8 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, goto out; } + memset(memmap, 0, sizeof(struct page) * nr_pages); + ms->section_mem_map |= SECTION_MARKED_PRESENT; ret = sparse_init_one_section(ms, section_nr, memmap, usemap); @@ -783,11 +734,67 @@ out: return ret; } +#ifdef CONFIG_MEMORY_HOTREMOVE +#ifdef CONFIG_MEMORY_FAILURE +static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +{ + int i; + + if (!memmap) + return; + + for (i = 0; i < PAGES_PER_SECTION; i++) { + if (PageHWPoison(&memmap[i])) { + atomic_long_sub(1, &num_poisoned_pages); + ClearPageHWPoison(&memmap[i]); + } + } +} +#else +static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +{ +} +#endif + +static void free_section_usemap(struct page *memmap, unsigned long *usemap) +{ + struct page *usemap_page; + unsigned long nr_pages; + + if (!usemap) + return; + + usemap_page = virt_to_page(usemap); + /* + * Check to see if allocation came from hot-plug-add + */ + if (PageSlab(usemap_page) || PageCompound(usemap_page)) { + kfree(usemap); + if (memmap) + __kfree_section_memmap(memmap, PAGES_PER_SECTION); + return; + } + + /* + * The usemap came from bootmem. This is packed with other usemaps + * on the section which has pgdat at boot time. Just keep it as is now. + */ + + if (memmap) { + nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) + >> PAGE_SHIFT; + + free_map_bootmem(memmap, nr_pages); + } +} + void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) { struct page *memmap = NULL; - unsigned long *usemap = NULL; + unsigned long *usemap = NULL, flags; + struct pglist_data *pgdat = zone->zone_pgdat; + pgdat_resize_lock(pgdat, &flags); if (ms->section_mem_map) { usemap = ms->pageblock_flags; memmap = sparse_decode_mem_map(ms->section_mem_map, @@ -795,7 +802,10 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) ms->section_mem_map = 0; ms->pageblock_flags = NULL; } + pgdat_resize_unlock(pgdat, &flags); + clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION); free_section_usemap(memmap, usemap); } -#endif +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ |