summaryrefslogtreecommitdiff
path: root/mm/sparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/sparse.c')
-rw-r--r--mm/sparse.c252
1 files changed, 138 insertions, 114 deletions
diff --git a/mm/sparse.c b/mm/sparse.c
index d21c6e5910d0..b5b2b6f7041b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -14,7 +14,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/bootmem_info.h>
-
+#include <linux/vmstat.h>
#include "internal.h"
#include <asm/dma.h>
@@ -43,11 +43,11 @@ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
#endif
-int page_to_nid(const struct page *page)
+int memdesc_nid(memdesc_flags_t mdf)
{
- return section_to_node_table[page_to_section(page)];
+ return section_to_node_table[memdesc_section(mdf)];
}
-EXPORT_SYMBOL(page_to_nid);
+EXPORT_SYMBOL(memdesc_nid);
static void set_section_nid(unsigned long section_nr, int nid)
{
@@ -126,10 +126,10 @@ static inline int sparse_early_nid(struct mem_section *section)
}
/* Validate the physical addressing limitations of the model */
-void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
+static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
unsigned long *end_pfn)
{
- unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
+ unsigned long max_sparsemem_pfn = (DIRECT_MAP_PHYSMEM_END + 1) >> PAGE_SHIFT;
/*
* Sanity checks - do not allow an architecture to pass
@@ -170,12 +170,6 @@ static void __section_mark_present(struct mem_section *ms,
ms->section_mem_map |= SECTION_MARKED_PRESENT;
}
-#define for_each_present_section_nr(start, section_nr) \
- for (section_nr = next_present_section_nr(start-1); \
- ((section_nr != -1) && \
- (section_nr <= __highest_present_section_nr)); \
- section_nr = next_present_section_nr(section_nr))
-
static inline unsigned long first_present_section_nr(void)
{
return next_present_section_nr(-1);
@@ -193,13 +187,10 @@ static void subsection_mask_set(unsigned long *map, unsigned long pfn,
void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
{
- int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
- unsigned long nr, start_sec = pfn_to_section_nr(pfn);
-
- if (!nr_pages)
- return;
+ int end_sec_nr = pfn_to_section_nr(pfn + nr_pages - 1);
+ unsigned long nr, start_sec_nr = pfn_to_section_nr(pfn);
- for (nr = start_sec; nr <= end_sec; nr++) {
+ for (nr = start_sec_nr; nr <= end_sec_nr; nr++) {
struct mem_section *ms;
unsigned long pfns;
@@ -227,33 +218,20 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en
{
unsigned long pfn;
-#ifdef CONFIG_SPARSEMEM_EXTREME
- if (unlikely(!mem_section)) {
- unsigned long size, align;
-
- size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
- align = 1 << (INTERNODE_CACHE_SHIFT);
- mem_section = memblock_alloc(size, align);
- if (!mem_section)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, size, align);
- }
-#endif
-
start &= PAGE_SECTION_MASK;
mminit_validate_memmodel_limits(&start, &end);
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
- unsigned long section = pfn_to_section_nr(pfn);
+ unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms;
- sparse_index_init(section, nid);
- set_section_nid(section, nid);
+ sparse_index_init(section_nr, nid);
+ set_section_nid(section_nr, nid);
- ms = __nr_to_section(section);
+ ms = __nr_to_section(section_nr);
if (!ms->section_mem_map) {
ms->section_mem_map = sparse_encode_early_nid(nid) |
SECTION_IS_ONLINE;
- __section_mark_present(ms, section);
+ __section_mark_present(ms, section_nr);
}
}
}
@@ -268,6 +246,16 @@ static void __init memblocks_present(void)
unsigned long start, end;
int i, nid;
+#ifdef CONFIG_SPARSEMEM_EXTREME
+ if (unlikely(!mem_section)) {
+ unsigned long size, align;
+
+ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
+ align = 1 << (INTERNODE_CACHE_SHIFT);
+ mem_section = memblock_alloc_or_panic(size, align);
+ }
+#endif
+
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
memory_present(nid, start, end);
}
@@ -281,7 +269,7 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
{
unsigned long coded_mem_map =
(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
- BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
+ BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
return coded_mem_map;
}
@@ -318,6 +306,7 @@ size_t mem_section_usage_size(void)
return sizeof(struct mem_section_usage) + usemap_size();
}
+#ifdef CONFIG_MEMORY_HOTREMOVE
static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
{
#ifndef CONFIG_NUMA
@@ -328,7 +317,6 @@ static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
#endif
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
static struct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
unsigned long size)
@@ -352,7 +340,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
again:
usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
if (!usage && limit) {
- limit = 0;
+ limit = MEMBLOCK_ALLOC_ACCESSIBLE;
goto again;
}
return usage;
@@ -415,19 +403,20 @@ static void __init check_usemap_section_nr(int nid,
#endif /* CONFIG_MEMORY_HOTREMOVE */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static unsigned long __init section_map_size(void)
+unsigned long __init section_map_size(void)
{
return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
}
#else
-static unsigned long __init section_map_size(void)
+unsigned long __init section_map_size(void)
{
return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
}
struct page __init *__populate_section_memmap(unsigned long pfn,
- unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
{
unsigned long size = section_map_size();
struct page *map = sparse_buffer_alloc(size);
@@ -498,6 +487,44 @@ void __weak __meminit vmemmap_populate_print_last(void)
{
}
+static void *sparse_usagebuf __meminitdata;
+static void *sparse_usagebuf_end __meminitdata;
+
+/*
+ * Helper function that is used for generic section initialization, and
+ * can also be used by any hooks added above.
+ */
+void __init sparse_init_early_section(int nid, struct page *map,
+ unsigned long pnum, unsigned long flags)
+{
+ BUG_ON(!sparse_usagebuf || sparse_usagebuf >= sparse_usagebuf_end);
+ check_usemap_section_nr(nid, sparse_usagebuf);
+ sparse_init_one_section(__nr_to_section(pnum), pnum, map,
+ sparse_usagebuf, SECTION_IS_EARLY | flags);
+ sparse_usagebuf = (void *)sparse_usagebuf + mem_section_usage_size();
+}
+
+static int __init sparse_usage_init(int nid, unsigned long map_count)
+{
+ unsigned long size;
+
+ size = mem_section_usage_size() * map_count;
+ sparse_usagebuf = sparse_early_usemaps_alloc_pgdat_section(
+ NODE_DATA(nid), size);
+ if (!sparse_usagebuf) {
+ sparse_usagebuf_end = NULL;
+ return -ENOMEM;
+ }
+
+ sparse_usagebuf_end = sparse_usagebuf + size;
+ return 0;
+}
+
+static void __init sparse_usage_fini(void)
+{
+ sparse_usagebuf = sparse_usagebuf_end = NULL;
+}
+
/*
* Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
* And number of present sections in this node is map_count.
@@ -506,47 +533,56 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
unsigned long pnum_end,
unsigned long map_count)
{
- struct mem_section_usage *usage;
unsigned long pnum;
struct page *map;
+ struct mem_section *ms;
- usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
- mem_section_usage_size() * map_count);
- if (!usage) {
+ if (sparse_usage_init(nid, map_count)) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
+
sparse_buffer_init(map_count * section_map_size(), nid);
+
+ sparse_vmemmap_init_nid_early(nid);
+
for_each_present_section_nr(pnum_begin, pnum) {
unsigned long pfn = section_nr_to_pfn(pnum);
if (pnum >= pnum_end)
break;
- map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
- nid, NULL);
- if (!map) {
- pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
- __func__, nid);
- pnum_begin = pnum;
- sparse_buffer_fini();
- goto failed;
+ ms = __nr_to_section(pnum);
+ if (!preinited_vmemmap_section(ms)) {
+ map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
+ nid, NULL, NULL);
+ if (!map) {
+ pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
+ __func__, nid);
+ pnum_begin = pnum;
+ sparse_usage_fini();
+ sparse_buffer_fini();
+ goto failed;
+ }
+ memmap_boot_pages_add(DIV_ROUND_UP(PAGES_PER_SECTION * sizeof(struct page),
+ PAGE_SIZE));
+ sparse_init_early_section(nid, map, pnum, 0);
}
- check_usemap_section_nr(nid, usage);
- sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
- SECTION_IS_EARLY);
- usage = (void *) usage + mem_section_usage_size();
}
+ sparse_usage_fini();
sparse_buffer_fini();
return;
failed:
- /* We failed to allocate, mark all the following pnums as not present */
+ /*
+ * We failed to allocate, mark all the following pnums as not present,
+ * except the ones already initialized earlier.
+ */
for_each_present_section_nr(pnum_begin, pnum) {
- struct mem_section *ms;
-
if (pnum >= pnum_end)
break;
ms = __nr_to_section(pnum);
+ if (!preinited_vmemmap_section(ms))
+ ms->section_mem_map = 0;
ms->section_mem_map = 0;
}
}
@@ -560,6 +596,8 @@ void __init sparse_init(void)
unsigned long pnum_end, pnum_begin, map_count = 1;
int nid_begin;
+ /* see include/linux/mmzone.h 'struct mem_section' definition */
+ BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
memblocks_present();
pnum_begin = first_present_section_nr();
@@ -629,9 +667,10 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
static struct page * __meminit populate_section_memmap(unsigned long pfn,
- unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
{
- return __populate_section_memmap(pfn, nr_pages, nid, altmap);
+ return __populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
}
static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
@@ -699,8 +738,9 @@ static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
return rc;
}
#else
-struct page * __meminit populate_section_memmap(unsigned long pfn,
- unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
+static struct page * __meminit populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
{
return kvmalloc_node(array_size(sizeof(struct page),
PAGES_PER_SECTION), GFP_KERNEL, nid);
@@ -715,19 +755,19 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
static void free_map_bootmem(struct page *memmap)
{
unsigned long maps_section_nr, removing_section_nr, i;
- unsigned long magic, nr_pages;
+ unsigned long type, nr_pages;
struct page *page = virt_to_page(memmap);
nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
- magic = page->index;
+ type = bootmem_type(page);
- BUG_ON(magic == NODE_INFO);
+ BUG_ON(type == NODE_INFO);
maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
- removing_section_nr = page_private(page);
+ removing_section_nr = bootmem_info(page);
/*
* When this function is called, the removing section is
@@ -790,6 +830,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
unsigned long section_nr = pfn_to_section_nr(pfn);
/*
+ * Mark the section invalid so that valid_section()
+ * return false. This prevents code from dereferencing
+ * ms->usage array.
+ */
+ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
+
+ /*
* When removing an early section, the usage map is kept (as the
* usage maps of other sections fall into the same page). It
* will be re-used when re-adding the section - which is then no
@@ -797,38 +844,37 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
* was allocated during boot.
*/
if (!PageReserved(virt_to_page(ms->usage))) {
- kfree(ms->usage);
- ms->usage = NULL;
+ kfree_rcu(ms->usage, rcu);
+ WRITE_ONCE(ms->usage, NULL);
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- /*
- * Mark the section invalid so that valid_section()
- * return false. This prevents code from dereferencing
- * ms->usage array.
- */
- ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
}
/*
* The memmap of early sections is always fully populated. See
* section_activate() and pfn_valid() .
*/
- if (!section_is_early)
+ if (!section_is_early) {
+ memmap_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE)));
depopulate_section_memmap(pfn, nr_pages, altmap);
- else if (memmap)
+ } else if (memmap) {
+ memmap_boot_pages_add(-1L * (DIV_ROUND_UP(nr_pages * sizeof(struct page),
+ PAGE_SIZE)));
free_map_bootmem(memmap);
+ }
if (empty)
ms->section_mem_map = (unsigned long)NULL;
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+ unsigned long nr_pages, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
{
struct mem_section *ms = __pfn_to_section(pfn);
struct mem_section_usage *usage = NULL;
struct page *memmap;
- int rc = 0;
+ int rc;
if (!ms->usage) {
usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
@@ -855,11 +901,12 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
if (nr_pages < PAGES_PER_SECTION && early_section(ms))
return pfn_to_page(pfn);
- memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
+ memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
if (!memmap) {
section_deactivate(pfn, nr_pages, altmap);
return ERR_PTR(-ENOMEM);
}
+ memmap_pages_add(DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE));
return memmap;
}
@@ -869,7 +916,8 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
* @nid: The node to add section on
* @start_pfn: start pfn of the memory range
* @nr_pages: number of pfns to add in the section
- * @altmap: device page map
+ * @altmap: alternate pfns to allocate the memmap backing store
+ * @pgmap: alternate compound page geometry for devmap mappings
*
* This is only intended for hotplug.
*
@@ -883,7 +931,8 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
* * -ENOMEM - Out of memory.
*/
int __meminit sparse_add_section(int nid, unsigned long start_pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+ unsigned long nr_pages, struct vmem_altmap *altmap,
+ struct dev_pagemap *pgmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
@@ -894,7 +943,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
if (ret < 0)
return ret;
- memmap = section_activate(nid, start_pfn, nr_pages, altmap);
+ memmap = section_activate(nid, start_pfn, nr_pages, altmap, pgmap);
if (IS_ERR(memmap))
return PTR_ERR(memmap);
@@ -916,39 +965,14 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
return 0;
}
-#ifdef CONFIG_MEMORY_FAILURE
-static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
+void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
{
- int i;
+ struct mem_section *ms = __pfn_to_section(pfn);
- /*
- * A further optimization is to have per section refcounted
- * num_poisoned_pages. But that would need more space per memmap, so
- * for now just do a quick global check to speed up this routine in the
- * absence of bad pages.
- */
- if (atomic_long_read(&num_poisoned_pages) == 0)
+ if (WARN_ON_ONCE(!valid_section(ms)))
return;
- for (i = 0; i < nr_pages; i++) {
- if (PageHWPoison(&memmap[i])) {
- num_poisoned_pages_dec();
- ClearPageHWPoison(&memmap[i]);
- }
- }
-}
-#else
-static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-}
-#endif
-
-void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
- unsigned long nr_pages, unsigned long map_offset,
- struct vmem_altmap *altmap)
-{
- clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
- nr_pages - map_offset);
section_deactivate(pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */