summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/khugepaged.c15
-rw-r--r--mm/kmemleak.c12
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/page_owner.c6
-rw-r--r--mm/percpu-km.c8
-rw-r--r--mm/percpu-vm.c18
-rw-r--r--mm/percpu.c67
-rw-r--r--mm/shmem.c31
-rw-r--r--mm/slab.c1
-rw-r--r--mm/vmscan.c31
-rw-r--r--mm/vmstat.c2
16 files changed, 144 insertions, 122 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87ab9b8f56b5..5a68730eebd6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
VM_BUG_ON_PAGE(!PageCompound(page), page);
- if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+ true)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ alloc:
}
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
- huge_gfp, &memcg, true))) {
+ huge_gfp | __GFP_NORETRY, &memcg, true))) {
put_page(new_page);
split_huge_pmd(vma, vmf->pmd, vmf->address);
if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
list_for_each_safe(pos, next, &list) {
page = list_entry((void *)pos, struct page, mapping);
- lock_page(page);
+ if (!trylock_page(page))
+ goto next;
/* split_huge_page() removes page from list on success */
if (!split_huge_page(page))
split++;
unlock_page(page);
+next:
put_page(page);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a963f2034dfc..976bbc5646fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
#include <linux/bootmem.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
+#include <linux/mmdebug.h>
#include <linux/sched/signal.h>
#include <linux/rmap.h>
#include <linux/string_helpers.h>
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
struct resv_map *resv_map;
long gbl_reserve;
+ /* This should never happen */
+ if (from > to) {
+ VM_WARN(1, "%s called with a negative range\n", __func__);
+ return -EINVAL;
+ }
+
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b7e2268dfc9a..e42568284e06 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out;
}
- VM_BUG_ON_PAGE(PageCompound(page), page);
+ /* TODO: teach khugepaged to collapse THP mapped with pte */
+ if (PageCompound(page)) {
+ result = SCAN_PAGE_COMPOUND;
+ goto out;
+ }
+
VM_BUG_ON_PAGE(!PageAnon(page), page);
/*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out_nolock;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
goto out;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e83987c55a08..46c2290a08f1 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1657,8 +1657,7 @@ static void start_scan_thread(void)
}
/*
- * Stop the automatic memory scanning thread. This function must be called
- * with the scan_mutex held.
+ * Stop the automatic memory scanning thread.
*/
static void stop_scan_thread(void)
{
@@ -1921,12 +1920,15 @@ static void kmemleak_do_cleanup(struct work_struct *work)
{
stop_scan_thread();
+ mutex_lock(&scan_mutex);
/*
- * Once the scan thread has stopped, it is safe to no longer track
- * object freeing. Ordering of the scan thread stopping and the memory
- * accesses below is guaranteed by the kthread_stop() function.
+ * Once it is made sure that kmemleak_scan has stopped, it is safe to no
+ * longer track object freeing. Ordering of the scan thread stopping and
+ * the memory accesses below is guaranteed by the kthread_stop()
+ * function.
*/
kmemleak_free_enabled = 0;
+ mutex_unlock(&scan_mutex);
if (!kmemleak_found_leaks)
__kmemleak_do_cleanup();
diff --git a/mm/memblock.c b/mm/memblock.c
index b6ba6b7adadc..48376bd33274 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
*out_nid = r->nid;
}
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
- unsigned long max_pfn)
-{
- struct memblock_type *type = &memblock.memory;
- unsigned int right = type->cnt;
- unsigned int mid, left = 0;
- phys_addr_t addr = PFN_PHYS(++pfn);
-
- do {
- mid = (right + left) / 2;
-
- if (addr < type->regions[mid].base)
- right = mid;
- else if (addr >= (type->regions[mid].base +
- type->regions[mid].size))
- left = mid + 1;
- else {
- /* addr is within the region, so pfn is valid */
- return pfn;
- }
- } while (left < right);
-
- if (right == type->cnt)
- return -1UL;
- else
- return PHYS_PFN(type->regions[right].base);
-}
-
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 670e99b68aa6..9ec024b862ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -714,9 +714,9 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
* invocations for reference counting, or use mem_cgroup_iter_break()
* to cancel a hierarchy walk before the round-trip is complete.
*
- * Reclaimers can specify a zone and a priority level in @reclaim to
+ * Reclaimers can specify a node and a priority level in @reclaim to
* divide up the memcgs in the hierarchy among all concurrent
- * reclaimers operating on the same zone and priority.
+ * reclaimers operating on the same node and priority.
*/
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -2299,7 +2299,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
}
/**
- * memcg_kmem_charge: charge a kmem page
+ * memcg_kmem_charge_memcg: charge a kmem page
* @page: page to charge
* @gfp: reclaim mode
* @order: allocation order
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d879f1d8a44a..32cba0332787 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
case MPOL_INTERLEAVE:
return !!nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
+ /* a's ->flags is the same as b's */
+ if (a->flags & MPOL_F_LOCAL)
+ return true;
return a->v.preferred_node == b->v.preferred_node;
default:
BUG();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3d974cb2a1a1..1741dd23e7c1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
* Remove at a later date when no bug reports exist related to
* grouping pages by mobility
*/
- VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
+ VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
+ pfn_valid(page_to_pfn(end_page)) &&
+ page_zone(start_page) != page_zone(end_page));
#endif
if (num_movable)
@@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
return false;
/* this guy won't enter reclaim */
- if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+ if (current->flags & PF_MEMALLOC)
return false;
/* We're only interested __GFP_FS allocations for now */
@@ -5354,22 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- /*
- * Skip to the pfn preceding the next valid one (or
- * end_pfn), such that we hit a valid pfn (or end_pfn)
- * on our next iteration of the loop. Note that it needs
- * to be pageblock aligned even when the region itself
- * is not. move_freepages_block() can shift ahead of
- * the valid region but still depends on correct page
- * metadata.
- */
- pfn = (memblock_next_valid_pfn(pfn, end_pfn) &
- ~(pageblock_nr_pages-1)) - 1;
-#endif
+ if (!early_pfn_valid(pfn))
continue;
- }
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 9886c6073828..7172e0a80e13 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -123,13 +123,13 @@ void __reset_page_owner(struct page *page, unsigned int order)
static inline bool check_recursive_alloc(struct stack_trace *trace,
unsigned long ip)
{
- int i, count;
+ int i;
if (!trace->nr_entries)
return false;
- for (i = 0, count = 0; i < trace->nr_entries; i++) {
- if (trace->entries[i] == ip && ++count == 2)
+ for (i = 0; i < trace->nr_entries; i++) {
+ if (trace->entries[i] == ip)
return true;
}
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d2a76642c4ae..38de70ab1a0d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
#include <linux/log2.h>
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
- int page_start, int page_end)
+ int page_start, int page_end, gfp_t gfp)
{
return 0;
}
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
/* nada */
}
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
struct pcpu_chunk *chunk;
struct page *pages;
int i;
- chunk = pcpu_alloc_chunk();
+ chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;
- pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
+ pages = alloc_pages(gfp, order_base_2(nr_pages));
if (!pages) {
pcpu_free_chunk(chunk);
return NULL;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9158e5a81391..d8078de912de 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
lockdep_assert_held(&pcpu_alloc_mutex);
if (!pages)
- pages = pcpu_mem_zalloc(pages_size);
+ pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
return pages;
}
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
* @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
* @page_start: page index of the first page to be allocated
* @page_end: page index of the last page to be allocated + 1
+ * @gfp: allocation flags passed to the underlying allocator
*
* Allocate pages [@page_start,@page_end) into @pages for all units.
* The allocation is for @chunk. Percpu core doesn't care about the
* content of @pages and will pass it verbatim to pcpu_map_pages().
*/
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
- struct page **pages, int page_start, int page_end)
+ struct page **pages, int page_start, int page_end,
+ gfp_t gfp)
{
- const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
unsigned int cpu, tcpu;
int i;
+ gfp |= __GFP_HIGHMEM;
+
for_each_possible_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
* @chunk: chunk of interest
* @page_start: the start page
* @page_end: the end page
+ * @gfp: allocation flags passed to the underlying memory allocator
*
* For each cpu, populate and map pages [@page_start,@page_end) into
* @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
* pcpu_alloc_mutex, does GFP_KERNEL allocation.
*/
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
- int page_start, int page_end)
+ int page_start, int page_end, gfp_t gfp)
{
struct page **pages;
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
if (!pages)
return -ENOMEM;
- if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
+ if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
return -ENOMEM;
if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
pcpu_free_pages(chunk, pages, page_start, page_end);
}
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
struct pcpu_chunk *chunk;
struct vm_struct **vms;
- chunk = pcpu_alloc_chunk();
+ chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..9297098519a6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/kmemleak.h>
+#include <linux/sched.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
/**
* pcpu_mem_zalloc - allocate memory
* @size: bytes to allocate
+ * @gfp: allocation flags
*
* Allocate @size bytes. If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vzalloc() is used. The returned
- * memory is always zeroed.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
+ * This is to facilitate passing through whitelisted flags. The
+ * returned memory is always zeroed.
*
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void *pcpu_mem_zalloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
{
if (WARN_ON_ONCE(!slab_is_available()))
return NULL;
if (size <= PAGE_SIZE)
- return kzalloc(size, GFP_KERNEL);
+ return kzalloc(size, gfp);
else
- return vzalloc(size);
+ return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
}
/**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
return chunk;
}
-static struct pcpu_chunk *pcpu_alloc_chunk(void)
+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
{
struct pcpu_chunk *chunk;
int region_bits;
- chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
+ chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
if (!chunk)
return NULL;
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
region_bits = pcpu_chunk_map_bits(chunk);
chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
- sizeof(chunk->alloc_map[0]));
+ sizeof(chunk->alloc_map[0]), gfp);
if (!chunk->alloc_map)
goto alloc_map_fail;
chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
- sizeof(chunk->bound_map[0]));
+ sizeof(chunk->bound_map[0]), gfp);
if (!chunk->bound_map)
goto bound_map_fail;
chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
- sizeof(chunk->md_blocks[0]));
+ sizeof(chunk->md_blocks[0]), gfp);
if (!chunk->md_blocks)
goto md_blocks_fail;
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
* pcpu_addr_to_page - translate address to physical address
* pcpu_verify_alloc_info - check alloc_info is acceptable during init
*/
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static struct pcpu_chunk *pcpu_create_chunk(void);
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end, gfp_t gfp);
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end);
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
static struct page *pcpu_addr_to_page(void *addr);
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
+ /* whitelisted flags that can be passed to the backing allocators */
+ gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
bool do_warn = !(gfp & __GFP_NOWARN);
static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}
- if (!is_atomic)
- mutex_lock(&pcpu_alloc_mutex);
+ if (!is_atomic) {
+ /*
+ * pcpu_balance_workfn() allocates memory under this mutex,
+ * and it may wait for memory reclaim. Allow current task
+ * to become OOM victim, in case of memory pressure.
+ */
+ if (gfp & __GFP_NOFAIL)
+ mutex_lock(&pcpu_alloc_mutex);
+ else if (mutex_lock_killable(&pcpu_alloc_mutex))
+ return NULL;
+ }
spin_lock_irqsave(&pcpu_lock, flags);
@@ -1421,7 +1434,7 @@ restart:
}
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
- chunk = pcpu_create_chunk();
+ chunk = pcpu_create_chunk(pcpu_gfp);
if (!chunk) {
err = "failed to allocate new chunk";
goto fail;
@@ -1450,7 +1463,7 @@ area_found:
page_start, page_end) {
WARN_ON(chunk->immutable);
- ret = pcpu_populate_chunk(chunk, rs, re);
+ ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
spin_lock_irqsave(&pcpu_lock, flags);
if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
* pcpu_balance_workfn - manage the amount of free chunks and populated pages
* @work: unused
*
- * Reclaim all fully free chunks except for the first one.
+ * Reclaim all fully free chunks except for the first one. This is also
+ * responsible for maintaining the pool of empty populated pages. However,
+ * it is possible that this is called when physical memory is scarce causing
+ * OOM killer to be triggered. We should avoid doing so until an actual
+ * allocation causes the failure as it is possible that requests can be
+ * serviced from already backed regions.
*/
static void pcpu_balance_workfn(struct work_struct *work)
{
+ /* gfp flags passed to underlying allocators */
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
LIST_HEAD(to_free);
struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
spin_unlock_irq(&pcpu_lock);
}
pcpu_destroy_chunk(chunk);
+ cond_resched();
}
/*
@@ -1645,7 +1666,7 @@ retry_pop:
chunk->nr_pages) {
int nr = min(re - rs, nr_to_pop);
- ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+ ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
if (!ret) {
nr_to_pop -= nr;
spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
if (nr_to_pop) {
/* ran out of chunks to populate, create a new one and retry */
- chunk = pcpu_create_chunk();
+ chunk = pcpu_create_chunk(gfp);
if (chunk) {
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ next:
info = list_entry(pos, struct shmem_inode_info, shrinklist);
inode = &info->vfs_inode;
- if (nr_to_split && split >= nr_to_split) {
- iput(inode);
- continue;
- }
+ if (nr_to_split && split >= nr_to_split)
+ goto leave;
- page = find_lock_page(inode->i_mapping,
+ page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
if (!page)
goto drop;
+ /* No huge page at the end of the file: nothing to split */
if (!PageTransHuge(page)) {
- unlock_page(page);
put_page(page);
goto drop;
}
+ /*
+ * Leave the inode on the list if we failed to lock
+ * the page at this time.
+ *
+ * Waiting for the lock may lead to deadlock in the
+ * reclaim path.
+ */
+ if (!trylock_page(page)) {
+ put_page(page);
+ goto leave;
+ }
+
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
- if (ret) {
- /* split failed: leave it on the list */
- iput(inode);
- continue;
- }
+ /* If split failed leave the inode on the list */
+ if (ret)
+ goto leave;
split++;
drop:
list_del_init(&info->shrinklist);
removed++;
+leave:
iput(inode);
}
diff --git a/mm/slab.c b/mm/slab.c
index 324446621b3e..9095c3945425 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1283,6 +1283,7 @@ void __init kmem_cache_init(void)
nr_node_ids * sizeof(struct kmem_cache_node *),
SLAB_HWCACHE_ALIGN, 0, 0);
list_add(&kmem_cache->list, &slab_caches);
+ memcg_link_cache(kmem_cache);
slab_state = PARTIAL;
/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bee53495a829..cd5dc3faaa57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
+ * If dirty pages are scanned that are not queued for IO, it
+ * implies that flushers are not doing their job. This can
+ * happen when memory pressure pushes dirty pages to the end of
+ * the LRU before the dirty limits are breached and the dirty
+ * data has expired. It can also happen when the proportion of
+ * dirty pages grows not through writes but through memory
+ * pressure reclaiming all the clean cache. And in some cases,
+ * the flushers simply cannot keep up with the allocation
+ * rate. Nudge the flusher threads in case they are asleep.
+ */
+ if (stat.nr_unqueued_dirty == nr_taken)
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
+
+ /*
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling here.
*/
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
- /*
- * If dirty pages are scanned that are not queued for IO, it
- * implies that flushers are not doing their job. This can
- * happen when memory pressure pushes dirty pages to the end of
- * the LRU before the dirty limits are breached and the dirty
- * data has expired. It can also happen when the proportion of
- * dirty pages grows not through writes but through memory
- * pressure reclaiming all the clean cache. And in some cases,
- * the flushers simply cannot keep up with the allocation
- * rate. Nudge the flusher threads in case they are asleep, but
- * also allow kswapd to start writing pages during reclaim.
- */
- if (stat.nr_unqueued_dirty == nr_taken) {
- wakeup_flusher_threads(WB_REASON_VMSCAN);
+ /* Allow kswapd to start writing pages during reclaim. */
+ if (stat.nr_unqueued_dirty == nr_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
- }
/*
* If kswapd scans pages marked marked for immediate
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 40b2db6db6b1..33581be705f0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1839,9 +1839,11 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
+ preempt_disable();
queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
+ preempt_enable();
}
}