summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-19 09:45:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-19 09:45:58 -0700
commit249be8511b269495bc95cb8bdfdd5840b2ba73c0 (patch)
tree6920bde053faa0284b52b2a9c9695f5516520377 /include
parent3bfe1fc46794631366faa3ef075e1b0ff7ba120a (diff)
parenteec4844fae7c033a0c1fc1eb3b8517aeb8b6cc49 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge yet more updates from Andrew Morton: "The rest of MM and a kernel-wide procfs cleanup. Summary of the more significant patches: - Patch series "mm/memory_hotplug: Factor out memory block devicehandling", v3. David Hildenbrand. Some spring-cleaning of the memory hotplug code, notably in drivers/base/memory.c - "mm: thp: fix false negative of shmem vma's THP eligibility". Yang Shi. Fix /proc/pid/smaps output for THP pages used in shmem. - "resource: fix locking in find_next_iomem_res()" + 1. Nadav Amit. Bugfix and speedup for kernel/resource.c - Patch series "mm: Further memory block device cleanups", David Hildenbrand. More spring-cleaning of the memory hotplug code. - Patch series "mm: Sub-section memory hotplug support". Dan Williams. Generalise the memory hotplug code so that pmem can use it more completely. Then remove the hacks from the libnvdimm code which were there to work around the memory-hotplug code's constraints. - "proc/sysctl: add shared variables for range check", Matteo Croce. We have about 250 instances of int zero; ... .extra1 = &zero, in the tree. This is a tree-wide sweep to make all those private "zero"s and "one"s use global variables. Alas, it isn't practical to make those two global integers const" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (38 commits) proc/sysctl: add shared variables for range check mm: migrate: remove unused mode argument mm/sparsemem: cleanup 'section number' data types libnvdimm/pfn: stop padding pmem namespaces to section alignment libnvdimm/pfn: fix fsdax-mode namespace info-block zero-fields mm/devm_memremap_pages: enable sub-section remap mm: document ZONE_DEVICE memory-model implications mm/sparsemem: support sub-section hotplug mm/sparsemem: prepare for sub-section ranges mm: kill is_dev_zone() helper mm/hotplug: kill is_dev_zone() usage in __remove_pages() mm/sparsemem: convert kmalloc_section_memmap() to populate_section_memmap() mm/hotplug: prepare shrink_{zone, pgdat}_span for sub-section removal mm/sparsemem: add helpers track active portions of a section at boot mm/sparsemem: introduce a SECTION_IS_EARLY flag mm/sparsemem: introduce struct mem_section_usage drivers/base/memory.c: get rid of find_memory_block_hinted() mm/memory_hotplug: move and simplify walk_memory_blocks() mm/memory_hotplug: rename walk_memory_range() and pass start+size instead of pfns mm: make register_mem_sect_under_node() static ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/huge_mm.h23
-rw-r--r--include/linux/memory.h11
-rw-r--r--include/linux/memory_hotplug.h19
-rw-r--r--include/linux/migrate.h3
-rw-r--r--include/linux/mm.h38
-rw-r--r--include/linux/mmzone.h88
-rw-r--r--include/linux/node.h14
-rw-r--r--include/linux/sysctl.h7
8 files changed, 130 insertions, 73 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7cd5c150c21d..45ede62aa85b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -121,6 +121,23 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
bool transparent_hugepage_enabled(struct vm_area_struct *vma);
+#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
+
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ /* Don't have to check pgoff for anonymous vma */
+ if (!vma_is_anonymous(vma)) {
+ if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
+ (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
+ return false;
+ }
+
+ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ return false;
+ return true;
+}
+
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -271,6 +288,12 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
return false;
}
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ return false;
+}
+
static inline void prep_transhuge_page(struct page *page) {}
#define transparent_hugepage_flags 0UL
diff --git a/include/linux/memory.h b/include/linux/memory.h
index e1dc1bb2b787..02e633f3ede0 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -111,16 +111,15 @@ extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
extern int register_memory_isolate_notifier(struct notifier_block *nb);
extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
-int hotplug_memory_register(int nid, struct mem_section *section);
-#ifdef CONFIG_MEMORY_HOTREMOVE
-extern void unregister_memory_section(struct mem_section *);
-#endif
+int create_memory_block_devices(unsigned long start, unsigned long size);
+void remove_memory_block_devices(unsigned long start, unsigned long size);
extern int memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
extern int memory_isolate_notify(unsigned long val, void *v);
-extern struct memory_block *find_memory_block_hinted(struct mem_section *,
- struct memory_block *);
extern struct memory_block *find_memory_block(struct mem_section *);
+typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
+extern int walk_memory_blocks(unsigned long start, unsigned long size,
+ void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 988fde33cd7f..f46ea71b4ffd 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -123,20 +123,10 @@ static inline bool movable_node_is_enabled(void)
return movable_node_enabled;
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
extern void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap);
extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
-#endif /* CONFIG_MEMORY_HOTREMOVE */
-
-/*
- * Do we want sysfs memblock files created. This will allow userspace to online
- * and offline memory explicitly. Lack of this bit means that the caller has to
- * call move_pfn_range_to_zone to finish the initialization.
- */
-
-#define MHP_MEMBLOCK_API (1<<0)
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -350,17 +340,16 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
extern void __ref free_area_init_core_hotplug(int nid);
-extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
- void *arg, int (*func)(struct memory_block *, void *));
extern int __add_memory(int nid, u64 start, u64 size);
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
extern bool is_memblock_offlined(struct memory_block *mem);
-extern int sparse_add_one_section(int nid, unsigned long start_pfn,
- struct vmem_altmap *altmap);
-extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
+extern int sparse_add_section(int nid, unsigned long pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void sparse_remove_section(struct mem_section *ms,
+ unsigned long pfn, unsigned long nr_pages,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e13d9bf2f9a5..7f04754c7f2b 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -77,8 +77,7 @@ extern void migrate_page_copy(struct page *newpage, struct page *page);
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
- struct page *newpage, struct page *page, enum migrate_mode mode,
- int extra_count);
+ struct page *newpage, struct page *page, int extra_count);
#else
static inline void putback_movable_pages(struct list_head *l) {}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bd6512559bed..0334ca97c584 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -541,6 +541,23 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
vma->vm_ops = NULL;
}
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+ return !vma->vm_ops;
+}
+
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
+int vma_is_stack_for_current(struct vm_area_struct *vma);
+
/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
@@ -1620,23 +1637,6 @@ int clear_page_dirty_for_io(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-static inline bool vma_is_anonymous(struct vm_area_struct *vma)
-{
- return !vma->vm_ops;
-}
-
-#ifdef CONFIG_SHMEM
-/*
- * The vma_is_shmem is not inline because it is used only by slow
- * paths in userfault.
- */
-bool vma_is_shmem(struct vm_area_struct *vma);
-#else
-static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
-#endif
-
-int vma_is_stack_for_current(struct vm_area_struct *vma);
-
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
@@ -2767,8 +2767,8 @@ static inline void print_vma_addr(char *prefix, unsigned long rip)
#endif
void *sparse_buffer_alloc(unsigned long size);
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
- struct vmem_altmap *altmap);
+struct page * __populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70394cabaf4e..d77d717c620c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -855,18 +855,6 @@ static inline int local_memory_node(int node_id) { return node_id; };
*/
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return zone_idx(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return false;
-}
-#endif
-
/*
* Returns true if a zone has pages managed by the buddy allocator.
* All the reclaim decisions have to use this function rather than
@@ -1160,6 +1148,29 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
+#define SUBSECTION_SHIFT 21
+
+#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
+#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
+#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))
+
+#if SUBSECTION_SHIFT > SECTION_SIZE_BITS
+#error Subsection size exceeds section size
+#else
+#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
+#endif
+
+#define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
+#define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
+
+struct mem_section_usage {
+ DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
+ /* See declaration of similar field in struct zone */
+ unsigned long pageblock_flags[0];
+};
+
+void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
+
struct page;
struct page_ext;
struct mem_section {
@@ -1177,8 +1188,7 @@ struct mem_section {
*/
unsigned long section_mem_map;
- /* See declaration of similar field in struct zone */
- unsigned long *pageblock_flags;
+ struct mem_section_usage *usage;
#ifdef CONFIG_PAGE_EXTENSION
/*
* If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
@@ -1209,6 +1219,11 @@ extern struct mem_section **mem_section;
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
+static inline unsigned long *section_to_usemap(struct mem_section *ms)
+{
+ return ms->usage->pageblock_flags;
+}
+
static inline struct mem_section *__nr_to_section(unsigned long nr)
{
#ifdef CONFIG_SPARSEMEM_EXTREME
@@ -1219,8 +1234,8 @@ static inline struct mem_section *__nr_to_section(unsigned long nr)
return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
-extern int __section_nr(struct mem_section* ms);
-extern unsigned long usemap_size(void);
+extern unsigned long __section_nr(struct mem_section *ms);
+extern size_t mem_section_usage_size(void);
/*
* We use the lower bits of the mem_map pointer to store
@@ -1238,7 +1253,8 @@ extern unsigned long usemap_size(void);
#define SECTION_MARKED_PRESENT (1UL<<0)
#define SECTION_HAS_MEM_MAP (1UL<<1)
#define SECTION_IS_ONLINE (1UL<<2)
-#define SECTION_MAP_LAST_BIT (1UL<<3)
+#define SECTION_IS_EARLY (1UL<<3)
+#define SECTION_MAP_LAST_BIT (1UL<<4)
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
#define SECTION_NID_SHIFT 3
@@ -1264,6 +1280,11 @@ static inline int valid_section(struct mem_section *section)
return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
}
+static inline int early_section(struct mem_section *section)
+{
+ return (section && (section->section_mem_map & SECTION_IS_EARLY));
+}
+
static inline int valid_section_nr(unsigned long nr)
{
return valid_section(__nr_to_section(nr));
@@ -1291,14 +1312,42 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn)
return __nr_to_section(pfn_to_section_nr(pfn));
}
-extern int __highest_present_section_nr;
+extern unsigned long __highest_present_section_nr;
+
+static inline int subsection_map_index(unsigned long pfn)
+{
+ return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+ int idx = subsection_map_index(pfn);
+
+ return test_bit(idx, ms->usage->subsection_map);
+}
+#else
+static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+{
+ return 1;
+}
+#endif
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
static inline int pfn_valid(unsigned long pfn)
{
+ struct mem_section *ms;
+
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
- return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
+ ms = __nr_to_section(pfn_to_section_nr(pfn));
+ if (!valid_section(ms))
+ return 0;
+ /*
+ * Traditionally early sections always returned pfn_valid() for
+ * the entire section-sized span.
+ */
+ return early_section(ms) || pfn_section_valid(ms, pfn);
}
#endif
@@ -1330,6 +1379,7 @@ void sparse_init(void);
#define sparse_init() do {} while (0)
#define sparse_index_init(_sec, _nid) do {} while (0)
#define pfn_present pfn_valid
+#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
/*
diff --git a/include/linux/node.h b/include/linux/node.h
index 1a557c589ecb..4866f32a02d8 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -137,10 +137,7 @@ static inline int register_one_node(int nid)
extern void unregister_one_node(int nid);
extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
-extern int register_mem_sect_under_node(struct memory_block *mem_blk,
- void *arg);
-extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
- unsigned long phys_index);
+extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
@@ -171,15 +168,8 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
{
return 0;
}
-static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
- void *arg)
+static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
- return 0;
-}
-static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
- unsigned long phys_index)
-{
- return 0;
}
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index aadd310769d0..6df477329b76 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -37,6 +37,13 @@ struct ctl_table_root;
struct ctl_table_header;
struct ctl_dir;
+/* Keep the same order as in fs/proc/proc_sysctl.c */
+#define SYSCTL_ZERO ((void *)&sysctl_vals[0])
+#define SYSCTL_ONE ((void *)&sysctl_vals[1])
+#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2])
+
+extern const int sysctl_vals[];
+
typedef int proc_handler (struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);