summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-11-01 07:41:06 +0100
committerIngo Molnar <mingo@kernel.org>2016-11-01 07:41:06 +0100
commit05b93c19d50af2bd0d30fc000d817418ae8d33f1 (patch)
treedbfbd3f3bc13789e6450112c2013d7a2b042bdb9 /mm
parent24d86f59093b0bcb3756cdf47f2db10ff4e90dbb (diff)
parent0c183d92b20b5c84ca655b45ef57b3318b83eb9e (diff)
Merge branch 'linus' into x86/asm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/gup.c3
-rw-r--r--mm/kmemleak.c7
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memcontrol.c9
-rw-r--r--mm/memory_hotplug.c29
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page_alloc.c131
-rw-r--r--mm/slab.c45
-rw-r--r--mm/slab.h1
-rw-r--r--mm/util.c4
-rw-r--r--mm/vmscan.c2
14 files changed, 65 insertions, 177 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index be0ee11fa0d9..86e3e0e74d20 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,7 +187,7 @@ config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on ARCH_ENABLE_MEMORY_HOTPLUG
- depends on !KASAN
+ depends on COMPILE_TEST || !KASAN
config MEMORY_HOTPLUG_SPARSE
def_bool y
diff --git a/mm/filemap.c b/mm/filemap.c
index 849f459ad078..c7fe2f16503f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc);
*/
wait_queue_head_t *page_waitqueue(struct page *page)
{
- const struct zone *zone = page_zone(page);
-
- return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
+ return bit_waitqueue(page, 0);
}
EXPORT_SYMBOL(page_waitqueue);
diff --git a/mm/gup.c b/mm/gup.c
index 7aa113c2d373..ec4f82704b6f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -526,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
@@ -631,7 +631,6 @@ next_page:
} while (nr_pages);
return i;
}
-EXPORT_SYMBOL(__get_user_pages);
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
{
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a5e453cf05c4..e5355a5b423f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1453,8 +1453,11 @@ static void kmemleak_scan(void)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- scan_block(task_stack_page(p), task_stack_page(p) +
- THREAD_SIZE, NULL);
+ void *stack = try_get_task_stack(p);
+ if (stack) {
+ scan_block(stack, stack + THREAD_SIZE, NULL);
+ put_task_stack(p);
+ }
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 1d05cb9d363d..234676e31edd 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
err = memcg_init_list_lru(lru, memcg_aware);
if (err) {
kfree(lru->node);
+ /* Do this so a list_lru_destroy() doesn't crash: */
+ lru->node = NULL;
goto out;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae052b5e3315..0f870ba43942 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1917,6 +1917,15 @@ retry:
current->flags & PF_EXITING))
goto force;
+ /*
+ * Prevent unbounded recursion when reclaim operations need to
+ * allocate memory. This might exceed the limits temporarily,
+ * but we prefer facilitating memory reclaim and getting back
+ * under the limit over triggering OOM kills in these cases.
+ */
+ if (unlikely(current->flags & PF_MEMALLOC))
+ goto force;
+
if (unlikely(task_in_memcg_oom(current)))
goto nomem;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 962927309b6e..cad4b9125695 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
unsigned long i, pfn, end_pfn, nr_pages;
int node = pgdat->node_id;
struct page *page;
- struct zone *zone;
nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
page = virt_to_page(pgdat);
@@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
for (i = 0; i < nr_pages; i++, page++)
get_page_bootmem(node, page, NODE_INFO);
- zone = &pgdat->node_zones[0];
- for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
- if (zone_is_initialized(zone)) {
- nr_pages = zone->wait_table_hash_nr_entries
- * sizeof(wait_queue_head_t);
- nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
- page = virt_to_page(zone->wait_table);
-
- for (i = 0; i < nr_pages; i++, page++)
- get_page_bootmem(node, page, NODE_INFO);
- }
- }
-
pfn = pgdat->node_start_pfn;
end_pfn = pgdat_end_pfn(pgdat);
@@ -2131,7 +2117,6 @@ void try_offline_node(int nid)
unsigned long start_pfn = pgdat->node_start_pfn;
unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
unsigned long pfn;
- int i;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -2158,20 +2143,6 @@ void try_offline_node(int nid)
*/
node_set_offline(nid);
unregister_one_node(nid);
-
- /* free waittable in each zone */
- for (i = 0; i < MAX_NR_ZONES; i++) {
- struct zone *zone = pgdat->node_zones + i;
-
- /*
- * wait_table may be allocated from boot memory,
- * here only free if it's allocated by vmalloc.
- */
- if (is_vmalloc_addr(zone->wait_table)) {
- vfree(zone->wait_table);
- zone->wait_table = NULL;
- }
- }
}
EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bcdbe62f3e6d..11936526b08b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -25,7 +25,6 @@
#include <linux/perf_event.h>
#include <linux/pkeys.h>
#include <linux/ksm.h>
-#include <linux/pkeys.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index db5fd1795298..8b8faaf2a9e9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp)
return PAGE_SIZE << compound_order(page);
}
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int foll_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f639739cab9..2dd614877de2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type)
}
*p = '\0';
- printk("(%s) ", tmp);
+ printk(KERN_CONT "(%s) ", tmp);
}
/*
@@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter)
free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
show_node(zone);
- printk("%s"
+ printk(KERN_CONT
+ "%s"
" free:%lukB"
" min:%lukB"
" low:%lukB"
@@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
- printk(" %ld", zone->lowmem_reserve[i]);
- printk("\n");
+ printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+ printk(KERN_CONT "\n");
}
for_each_populated_zone(zone) {
@@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter)
if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
show_node(zone);
- printk("%s: ", zone->name);
+ printk(KERN_CONT "%s: ", zone->name);
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
@@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter)
}
spin_unlock_irqrestore(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
- printk("%lu*%lukB ", nr[order], K(1UL) << order);
+ printk(KERN_CONT "%lu*%lukB ",
+ nr[order], K(1UL) << order);
if (nr[order])
show_migration_types(types[order]);
}
- printk("= %lukB\n", K(total));
+ printk(KERN_CONT "= %lukB\n", K(total));
}
hugetlb_show_meminfo();
@@ -4977,72 +4979,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
}
/*
- * Helper functions to size the waitqueue hash table.
- * Essentially these want to choose hash table sizes sufficiently
- * large so that collisions trying to wait on pages are rare.
- * But in fact, the number of active page waitqueues on typical
- * systems is ridiculously low, less than 200. So this is even
- * conservative, even though it seems large.
- *
- * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
- * waitqueues, i.e. the size of the waitq table given the number of pages.
- */
-#define PAGES_PER_WAITQUEUE 256
-
-#ifndef CONFIG_MEMORY_HOTPLUG
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
- unsigned long size = 1;
-
- pages /= PAGES_PER_WAITQUEUE;
-
- while (size < pages)
- size <<= 1;
-
- /*
- * Once we have dozens or even hundreds of threads sleeping
- * on IO we've got bigger problems than wait queue collision.
- * Limit the size of the wait table to a reasonable size.
- */
- size = min(size, 4096UL);
-
- return max(size, 4UL);
-}
-#else
-/*
- * A zone's size might be changed by hot-add, so it is not possible to determine
- * a suitable size for its wait_table. So we use the maximum size now.
- *
- * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie:
- *
- * i386 (preemption config) : 4096 x 16 = 64Kbyte.
- * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
- * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte.
- *
- * The maximum entries are prepared when a zone's memory is (512K + 256) pages
- * or more by the traditional way. (See above). It equals:
- *
- * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte.
- * ia64(16K page size) : = ( 8G + 4M)byte.
- * powerpc (64K page size) : = (32G +16M)byte.
- */
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
- return 4096UL;
-}
-#endif
-
-/*
- * This is an integer logarithm so that shifts can be used later
- * to extract the more random high bits from the multiplicative
- * hash function before the remainder is taken.
- */
-static inline unsigned long wait_table_bits(unsigned long size)
-{
- return ffz(~size);
-}
-
-/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
* done. Non-atomic initialization, single-pass.
@@ -5304,49 +5240,6 @@ void __init setup_per_cpu_pageset(void)
alloc_percpu(struct per_cpu_nodestat);
}
-static noinline __ref
-int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
-{
- int i;
- size_t alloc_size;
-
- /*
- * The per-page waitqueue mechanism uses hashed waitqueues
- * per zone.
- */
- zone->wait_table_hash_nr_entries =
- wait_table_hash_nr_entries(zone_size_pages);
- zone->wait_table_bits =
- wait_table_bits(zone->wait_table_hash_nr_entries);
- alloc_size = zone->wait_table_hash_nr_entries
- * sizeof(wait_queue_head_t);
-
- if (!slab_is_available()) {
- zone->wait_table = (wait_queue_head_t *)
- memblock_virt_alloc_node_nopanic(
- alloc_size, zone->zone_pgdat->node_id);
- } else {
- /*
- * This case means that a zone whose size was 0 gets new memory
- * via memory hot-add.
- * But it may be the case that a new node was hot-added. In
- * this case vmalloc() will not be able to use this new node's
- * memory - this wait_table must be initialized to use this new
- * node itself as well.
- * To use this new node's memory, further consideration will be
- * necessary.
- */
- zone->wait_table = vmalloc(alloc_size);
- }
- if (!zone->wait_table)
- return -ENOMEM;
-
- for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
- init_waitqueue_head(zone->wait_table + i);
-
- return 0;
-}
-
static __meminit void zone_pcp_init(struct zone *zone)
{
/*
@@ -5367,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
unsigned long size)
{
struct pglist_data *pgdat = zone->zone_pgdat;
- int ret;
- ret = zone_wait_table_init(zone, size);
- if (ret)
- return ret;
+
pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn;
@@ -5382,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
zone_start_pfn, (zone_start_pfn + size));
zone_init_free_lists(zone);
+ zone->initialized = 1;
return 0;
}
diff --git a/mm/slab.c b/mm/slab.c
index 090fb26b3a39..0b0550ca85b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
+ parent->num_slabs = 0;
}
#define MAKE_LIST(cachep, listp, slab, nodeid) \
@@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
* guaranteed to be valid until irq is re-enabled, because it will be
* freed after synchronize_sched().
*/
- if (force_change)
+ if (old_shared && force_change)
synchronize_sched();
fail:
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
for_each_kmem_cache_node(cachep, node, n) {
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
unsigned long active_slabs = 0, num_slabs = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->slabs_full, lru) {
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs = n->num_slabs;
list_for_each_entry(page, &n->slabs_partial, lru) {
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
list_for_each_entry(page, &n->slabs_free, lru)
- num_slabs++;
+ num_slabs_free++;
free_objects += n->free_objects;
spin_unlock_irqrestore(&n->list_lock, flags);
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs -
+ (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
node, active_slabs, num_slabs, active_objs, num_objs,
free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
page = list_entry(p, struct page, lru);
list_del(&page->lru);
+ n->num_slabs--;
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
list_add_tail(&page->lru, &(n->slabs_free));
else
fixup_slab_list(cachep, n, page, &list);
+
+ n->num_slabs++;
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - page->active;
spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
page = list_last_entry(&n->slabs_free, struct page, lru);
list_move(&page->lru, list);
+ n->num_slabs--;
}
}
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
unsigned long num_objs;
unsigned long active_slabs = 0;
unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+ unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+ unsigned long num_slabs_full = 0;
const char *name;
char *error = NULL;
int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
check_irq_on();
spin_lock_irq(&n->list_lock);
- list_for_each_entry(page, &n->slabs_full, lru) {
- if (page->active != cachep->num && !error)
- error = "slabs_full accounting error";
- active_objs += cachep->num;
- active_slabs++;
- }
+ num_slabs += n->num_slabs;
+
list_for_each_entry(page, &n->slabs_partial, lru) {
if (page->active == cachep->num && !error)
error = "slabs_partial accounting error";
if (!page->active && !error)
error = "slabs_partial accounting error";
active_objs += page->active;
- active_slabs++;
+ num_slabs_partial++;
}
+
list_for_each_entry(page, &n->slabs_free, lru) {
if (page->active && !error)
error = "slabs_free accounting error";
- num_slabs++;
+ num_slabs_free++;
}
+
free_objects += n->free_objects;
if (n->shared)
shared_avail += n->shared->avail;
spin_unlock_irq(&n->list_lock);
}
- num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
+ active_slabs = num_slabs - num_slabs_free;
+ num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+ active_objs += (num_slabs_full * cachep->num);
+
if (num_objs - active_objs != free_objects && !error)
error = "free_objects accounting error";
diff --git a/mm/slab.h b/mm/slab.h
index 9653f2e2591a..bc05fdc3edce 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
+ unsigned long num_slabs;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
diff --git a/mm/util.c b/mm/util.c
index 952cbe7ad7b7..1a41553db866 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,8 +230,10 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
}
/* Check if the vma is being used as a stack by this task */
-int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
+int vma_is_stack_for_current(struct vm_area_struct *vma)
{
+ struct task_struct * __maybe_unused t = current;
+
return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 744f926af442..76fda2268148 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.gfp_mask,
sc.reclaim_idx);
+ current->flags |= PF_MEMALLOC;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ current->flags &= ~PF_MEMALLOC;
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);