summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c29
-rw-r--r--mm/debug.c4
-rw-r--r--mm/gup.c48
-rw-r--r--mm/huge_memory.c36
-rw-r--r--mm/hugetlb.c13
-rw-r--r--mm/kasan/kasan.h5
-rw-r--r--mm/kmemleak.c18
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/memory.c11
-rw-r--r--mm/memory_hotplug.c20
-rw-r--r--mm/mempolicy.c40
-rw-r--r--mm/migrate.c11
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/page_alloc.c59
-rw-r--r--mm/page_isolation.c51
-rw-r--r--mm/percpu.c8
-rw-r--r--mm/shmem.c58
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h3
-rw-r--r--mm/slab_common.c2
-rw-r--r--mm/slub.c5
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swapfile.c32
-rw-r--r--mm/util.c2
-rw-r--r--mm/vmscan.c29
-rw-r--r--mm/vmstat.c5
26 files changed, 349 insertions, 175 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index f171a83707ce..3319e0872d01 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -242,6 +242,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
bool check_target)
{
struct page *page = pfn_to_online_page(pfn);
+ struct page *block_page;
struct page *end_page;
unsigned long block_pfn;
@@ -267,20 +268,26 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
return false;
+ /* Ensure the start of the pageblock or zone is online and valid */
+ block_pfn = pageblock_start_pfn(pfn);
+ block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+ if (block_page) {
+ page = block_page;
+ pfn = block_pfn;
+ }
+
+ /* Ensure the end of the pageblock or zone is online and valid */
+ block_pfn += pageblock_nr_pages;
+ block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
+ end_page = pfn_to_online_page(block_pfn);
+ if (!end_page)
+ return false;
+
/*
* Only clear the hint if a sample indicates there is either a
* free page or an LRU page in the block. One or other condition
* is necessary for the block to be a migration source/target.
*/
- block_pfn = pageblock_start_pfn(pfn);
- pfn = max(block_pfn, zone->zone_start_pfn);
- page = pfn_to_page(pfn);
- if (zone != page_zone(page))
- return false;
- pfn = block_pfn + pageblock_nr_pages;
- pfn = min(pfn, zone_end_pfn(zone));
- end_page = pfn_to_page(pfn);
-
do {
if (pfn_valid_within(pfn)) {
if (check_source && PageLRU(page)) {
@@ -309,7 +316,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
static void __reset_isolation_suitable(struct zone *zone)
{
unsigned long migrate_pfn = zone->zone_start_pfn;
- unsigned long free_pfn = zone_end_pfn(zone);
+ unsigned long free_pfn = zone_end_pfn(zone) - 1;
unsigned long reset_migrate = free_pfn;
unsigned long reset_free = migrate_pfn;
bool source_set = false;
@@ -1363,7 +1370,7 @@ fast_isolate_freepages(struct compact_control *cc)
count_compact_events(COMPACTISOLATED, nr_isolated);
} else {
/* If isolation fails, abort the search */
- order = -1;
+ order = cc->search_order + 1;
page = NULL;
}
}
diff --git a/mm/debug.c b/mm/debug.c
index c0b31b6c3877..eee9c221280c 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -79,7 +79,7 @@ void __dump_page(struct page *page, const char *reason)
pr_warn("ksm ");
else if (mapping) {
pr_warn("%ps ", mapping->a_ops);
- if (mapping->host->i_dentry.first) {
+ if (mapping->host && mapping->host->i_dentry.first) {
struct dentry *dentry;
dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
pr_warn("name:\"%pd\" ", dentry);
@@ -168,7 +168,7 @@ void dump_mm(const struct mm_struct *mm)
mm_pgtables_bytes(mm),
mm->map_count,
mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
- atomic64_read(&mm->pinned_vm),
+ (u64)atomic64_read(&mm->pinned_vm),
mm->data_vm, mm->exec_vm, mm->stack_vm,
mm->start_code, mm->end_code, mm->start_data, mm->end_data,
mm->start_brk, mm->brk, mm->start_stack,
diff --git a/mm/gup.c b/mm/gup.c
index f84e22685aaa..91819b8ad9cc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -160,8 +160,12 @@ retry:
goto retry;
}
- if (flags & FOLL_GET)
- get_page(page);
+ if (flags & FOLL_GET) {
+ if (unlikely(!try_get_page(page))) {
+ page = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ }
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
@@ -298,7 +302,10 @@ retry_locked:
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ spin_unlock(ptl);
+ return ERR_PTR(-ENOMEM);
+ }
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
@@ -500,7 +507,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
if (is_device_public_page(*page))
goto unmap;
}
- get_page(*page);
+ if (unlikely(!try_get_page(*page))) {
+ ret = -ENOMEM;
+ goto unmap;
+ }
out:
ret = 0;
unmap:
@@ -1545,6 +1555,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
}
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+ struct page *head = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(head) < 0))
+ return NULL;
+ if (unlikely(!page_cache_add_speculative(head, refs)))
+ return NULL;
+ return head;
+}
+
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
@@ -1579,9 +1603,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ head = try_get_compound_head(page, 1);
+ if (!head)
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
@@ -1720,8 +1744,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pmd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pmd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1758,8 +1782,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pud_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pud_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1795,8 +1819,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pgd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pgd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455..165ea46bf149 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pmd_lock(mm, pmd);
+ if (!pmd_none(*pmd)) {
+ if (write) {
+ if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+ goto out_unlock;
+ }
+ entry = pmd_mkyoung(*pmd);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+ update_mmu_cache_pmd(vma, addr, pmd);
+ }
+
+ goto out_unlock;
+ }
+
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
@@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
+ pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
spin_unlock(ptl);
+ if (pgtable)
+ pte_free(mm, pgtable);
}
vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pud_lock(mm, pud);
+ if (!pud_none(*pud)) {
+ if (write) {
+ if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+ goto out_unlock;
+ }
+ entry = pud_mkyoung(*pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+ }
+ goto out_unlock;
+ }
+
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
@@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
spin_unlock(ptl);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 97b1e0290c66..6cdc7b2d9100 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4299,6 +4299,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));
+
+ /*
+ * Instead of doing 'try_get_page()' below in the same_page
+ * loop, just check the count once here.
+ */
+ if (unlikely(page_count(page) <= 0)) {
+ if (pages) {
+ spin_unlock(ptl);
+ remainder = 0;
+ err = -ENOMEM;
+ break;
+ }
+ }
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 3e0c11f7d7a1..3ce956efa0cb 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -163,7 +163,10 @@ static inline u8 random_tag(void)
#endif
#ifndef arch_kasan_set_tag
-#define arch_kasan_set_tag(addr, tag) ((void *)(addr))
+static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
+{
+ return addr;
+}
#endif
#ifndef arch_kasan_reset_tag
#define arch_kasan_reset_tag(addr) ((void *)(addr))
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 707fa5579f66..2e435b8142e5 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1401,6 +1401,7 @@ static void scan_block(void *_start, void *_end,
/*
* Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency.
*/
+#ifdef CONFIG_SMP
static void scan_large_block(void *start, void *end)
{
void *next;
@@ -1412,6 +1413,7 @@ static void scan_large_block(void *start, void *end)
cond_resched();
}
}
+#endif
/*
* Scan a memory block corresponding to a kmemleak_object. A condition is
@@ -1529,11 +1531,6 @@ static void kmemleak_scan(void)
}
rcu_read_unlock();
- /* data/bss scanning */
- scan_large_block(_sdata, _edata);
- scan_large_block(__bss_start, __bss_stop);
- scan_large_block(__start_ro_after_init, __end_ro_after_init);
-
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
for_each_possible_cpu(i)
@@ -2071,6 +2068,17 @@ void __init kmemleak_init(void)
}
local_irq_restore(flags);
+ /* register the data/bss sections */
+ create_object((unsigned long)_sdata, _edata - _sdata,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ create_object((unsigned long)__bss_start, __bss_stop - __bss_start,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+ /* only register .data..ro_after_init if not within .data */
+ if (__start_ro_after_init < _sdata || __end_ro_after_init > _edata)
+ create_object((unsigned long)__start_ro_after_init,
+ __end_ro_after_init - __start_ro_after_init,
+ KMEMLEAK_GREY, GFP_ATOMIC);
+
/*
* This is the point where tracking allocations is safe. Automatic
* scanning is started during the late initcall. Add the early logged
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 532e0e2a4817..81a0d3914ec9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
return &memcg->cgwb_domain;
}
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = atomic_long_read(&memcg->stat[idx]);
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+ if (x < 0)
+ x = 0;
+ return x;
+}
+
/**
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
* @wb: bdi_writeback in question
@@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
struct mem_cgroup *parent;
- *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+ *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
/* this should eventually include NR_UNSTABLE_NFS */
- *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+ *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
(1 << LRU_ACTIVE_FILE));
*pheadroom = PAGE_COUNTER_MAX;
diff --git a/mm/memory.c b/mm/memory.c
index 47fe250307c7..ab650c21bccd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1549,10 +1549,12 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
goto out_unlock;
}
- entry = *pte;
- goto out_mkwrite;
- } else
- goto out_unlock;
+ entry = pte_mkyoung(*pte);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ if (ptep_set_access_flags(vma, addr, pte, entry, 1))
+ update_mmu_cache(vma, addr, pte);
+ }
+ goto out_unlock;
}
/* Ok, finally just insert the thing.. */
@@ -1561,7 +1563,6 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
else
entry = pte_mkspecial(pfn_t_pte(pfn, prot));
-out_mkwrite:
if (mkwrite) {
entry = pte_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f767582af4f8..b236069ff0d8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
*/
mem = find_memory_block(__pfn_to_section(pfn));
nid = mem->nid;
+ put_device(&mem->dev);
/* associate pfn range with the zone */
zone = move_pfn_range(online_type, nid, pfn, nr_pages);
@@ -1576,7 +1577,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
{
unsigned long pfn, nr_pages;
long offlined_pages;
- int ret, node;
+ int ret, node, nr_isolate_pageblock;
unsigned long flags;
unsigned long valid_start, valid_end;
struct zone *zone;
@@ -1602,10 +1603,11 @@ static int __ref __offline_pages(unsigned long start_pfn,
ret = start_isolate_page_range(start_pfn, end_pfn,
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
- if (ret) {
+ if (ret < 0) {
reason = "failure to isolate range";
goto failed_removal;
}
+ nr_isolate_pageblock = ret;
arg.start_pfn = start_pfn;
arg.nr_pages = nr_pages;
@@ -1657,8 +1659,16 @@ static int __ref __offline_pages(unsigned long start_pfn,
/* Ok, all of our target is isolated.
We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
- /* reset pagetype flags and makes migrate type to be MOVABLE */
- undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+
+ /*
+ * Onlining will reset pagetype flags and makes migrate type
+ * MOVABLE, so just need to decrease the number of isolated
+ * pageblocks zone counter here.
+ */
+ spin_lock_irqsave(&zone->lock, flags);
+ zone->nr_isolate_pageblock -= nr_isolate_pageblock;
+ spin_unlock_irqrestore(&zone->lock, flags);
+
/* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
zone->present_pages -= offlined_pages;
@@ -1690,12 +1700,12 @@ static int __ref __offline_pages(unsigned long start_pfn,
failed_removal_isolated:
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+ memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal:
pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n",
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */
mem_hotplug_done();
return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index af171ccb56a2..2219e747df49 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page,
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}
+/*
+ * queue_pages_pmd() has three possible return values:
+ * 1 - pages are placed on the right node or queued successfully.
+ * 0 - THP was split.
+ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ * page was already on a node that does not follow the policy.
+ */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long flags;
if (unlikely(is_pmd_migration_entry(*pmd))) {
- ret = 1;
+ ret = -EIO;
goto unlock;
}
page = pmd_page(*pmd);
@@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
ret = 1;
flags = qp->flags;
/* go to thp migration */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
migrate_page_add(page, qp->pagelist, flags);
+ } else
+ ret = -EIO;
unlock:
spin_unlock(ptl);
out:
@@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret)
+ if (ret > 0)
return 0;
+ else if (ret < 0)
+ return ret;
}
if (pmd_trans_unstable(pmd))
@@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
continue;
if (!queue_pages_required(page, qp))
continue;
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(vma))
+ break;
+ migrate_page_add(page, qp->pagelist, flags);
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+ return addr != end ? -EIO : 0;
}
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
@@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;
- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;
if (endvma > end)
@@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
}
/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index ac6f4939bb59..663a5449367a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -248,10 +248,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
pte = swp_entry_to_pte(entry);
} else if (is_device_public_page(new)) {
pte = pte_mkdevmap(pte);
- flush_dcache_page(new);
}
- } else
- flush_dcache_page(new);
+ }
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
@@ -995,6 +993,13 @@ static int move_to_new_page(struct page *newpage, struct page *page,
*/
if (!PageMappingFlags(page))
page->mapping = NULL;
+
+ if (unlikely(is_zone_device_page(newpage))) {
+ if (is_device_public_page(newpage))
+ flush_dcache_page(newpage);
+ } else
+ flush_dcache_page(newpage);
+
}
out:
return rc;
diff --git a/mm/mmap.c b/mm/mmap.c
index 41eb48d9b527..bd7b9f293b39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,6 +45,7 @@
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
@@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (!prev || expand_stack(prev, addr))
+ /* don't alter vm_end if the coredump is running */
+ if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
+ /* don't alter vm_start if the coredump is running */
+ if (!mmget_still_valid(mm))
+ return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03fcf73d47da..c02cff1ed56e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = {
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
+ * are not on separate NUMA nodes. Functionally this works but with
+ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
+ * quite small. By default, do not boost watermarks on discontigmem as in
+ * many cases very high-order allocations like THP are likely to be
+ * unsupported and the premature reclaim offsets the advantage of long-term
+ * fragmentation avoidance.
+ */
+int watermark_boost_factor __read_mostly;
+#else
int watermark_boost_factor __read_mostly = 15000;
+#endif
int watermark_scale_factor = 10;
static unsigned long nr_kernel_pages __initdata;
@@ -3419,8 +3432,11 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
alloc_flags |= ALLOC_KSWAPD;
#ifdef CONFIG_ZONE_DMA32
+ if (!zone)
+ return alloc_flags;
+
if (zone_idx(zone) != ZONE_NORMAL)
- goto out;
+ return alloc_flags;
/*
* If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
@@ -3429,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
*/
BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
if (nr_online_nodes > 1 && !populated_zone(--zone))
- goto out;
+ return alloc_flags;
-out:
+ alloc_flags |= ALLOC_NOFRAGMENT;
#endif /* CONFIG_ZONE_DMA32 */
return alloc_flags;
}
@@ -3773,11 +3789,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
- if (*compact_result <= COMPACT_INACTIVE) {
- WARN_ON_ONCE(page);
- return NULL;
- }
-
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
* count a compaction stall
@@ -8005,7 +8016,10 @@ void *__init alloc_large_system_hash(const char *tablename,
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
int migratetype, int flags)
{
- unsigned long pfn, iter, found;
+ unsigned long found;
+ unsigned long iter = 0;
+ unsigned long pfn = page_to_pfn(page);
+ const char *reason = "unmovable page";
/*
* TODO we could make this much more efficient by not checking every
@@ -8015,17 +8029,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* can still lead to having bootmem allocations in zone_movable.
*/
- /*
- * CMA allocations (alloc_contig_range) really need to mark isolate
- * CMA pageblocks even when they are not movable in fact so consider
- * them movable here.
- */
- if (is_migrate_cma(migratetype) &&
- is_migrate_cma(get_pageblock_migratetype(page)))
- return false;
+ if (is_migrate_cma_page(page)) {
+ /*
+ * CMA allocations (alloc_contig_range) really need to mark
+ * isolate CMA pageblocks even when they are not movable in fact
+ * so consider them movable here.
+ */
+ if (is_migrate_cma(migratetype))
+ return false;
- pfn = page_to_pfn(page);
- for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+ reason = "CMA page";
+ goto unmovable;
+ }
+
+ for (found = 0; iter < pageblock_nr_pages; iter++) {
unsigned long check = pfn + iter;
if (!pfn_valid_within(check))
@@ -8105,7 +8122,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
unmovable:
WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
if (flags & REPORT_FAILURE)
- dump_page(pfn_to_page(pfn+iter), "unmovable page");
+ dump_page(pfn_to_page(pfn + iter), reason);
return true;
}
@@ -8233,7 +8250,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
ret = start_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype, 0);
- if (ret)
+ if (ret < 0)
return ret;
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index ce323e56b34d..019280712e1b 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -59,7 +59,8 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
* We just check MOVABLE pages.
*/
- if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags))
+ if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype,
+ isol_flags))
ret = 0;
/*
@@ -160,27 +161,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
return NULL;
}
-/*
- * start_isolate_page_range() -- make page-allocation-type of range of pages
- * to be MIGRATE_ISOLATE.
- * @start_pfn: The lower PFN of the range to be isolated.
- * @end_pfn: The upper PFN of the range to be isolated.
- * @migratetype: migrate type to set in error recovery.
+/**
+ * start_isolate_page_range() - make page-allocation-type of range of pages to
+ * be MIGRATE_ISOLATE.
+ * @start_pfn: The lower PFN of the range to be isolated.
+ * @end_pfn: The upper PFN of the range to be isolated.
+ * start_pfn/end_pfn must be aligned to pageblock_order.
+ * @migratetype: Migrate type to set in error recovery.
+ * @flags: The following flags are allowed (they can be combined in
+ * a bit mask)
+ * SKIP_HWPOISON - ignore hwpoison pages
+ * REPORT_FAILURE - report details about the failure to
+ * isolate the range
*
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
* the range will never be allocated. Any free pages and pages freed in the
- * future will not be allocated again.
- *
- * start_pfn/end_pfn must be aligned to pageblock_order.
- * Return 0 on success and -EBUSY if any part of range cannot be isolated.
+ * future will not be allocated again. If specified range includes migrate types
+ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
+ * pages in the range finally, the caller have to free all pages in the range.
+ * test_page_isolated() can be used for test it.
*
* There is no high level synchronization mechanism that prevents two threads
- * from trying to isolate overlapping ranges. If this happens, one thread
+ * from trying to isolate overlapping ranges. If this happens, one thread
* will notice pageblocks in the overlapping range already set to isolate.
* This happens in set_migratetype_isolate, and set_migratetype_isolate
- * returns an error. We then clean up by restoring the migration type on
- * pageblocks we may have modified and return -EBUSY to caller. This
+ * returns an error. We then clean up by restoring the migration type on
+ * pageblocks we may have modified and return -EBUSY to caller. This
* prevents two threads from simultaneously working on overlapping ranges.
+ *
+ * Return: the number of isolated pageblocks on success and -EBUSY if any part
+ * of range cannot be isolated.
*/
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned migratetype, int flags)
@@ -188,6 +198,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
unsigned long pfn;
unsigned long undo_pfn;
struct page *page;
+ int nr_isolate_pageblock = 0;
BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
@@ -196,13 +207,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
pfn < end_pfn;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
- if (page &&
- set_migratetype_isolate(page, migratetype, flags)) {
- undo_pfn = pfn;
- goto undo;
+ if (page) {
+ if (set_migratetype_isolate(page, migratetype, flags)) {
+ undo_pfn = pfn;
+ goto undo;
+ }
+ nr_isolate_pageblock++;
}
}
- return 0;
+ return nr_isolate_pageblock;
undo:
for (pfn = start_pfn;
pfn < undo_pfn;
diff --git a/mm/percpu.c b/mm/percpu.c
index 2e6fc8d552c9..68dd2e7e73b5 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2567,8 +2567,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
ai->groups[group].base_offset = areas[group] - base;
}
- pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
- PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+ pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
+ PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
ai->dyn_size, ai->unit_size);
rc = pcpu_setup_first_chunk(ai, base);
@@ -2692,8 +2692,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
}
/* we're ready, commit */
- pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
- unit_pages, psize_str, vm.addr, ai->static_size,
+ pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
+ unit_pages, psize_str, ai->static_size,
ai->reserved_size, ai->dyn_size);
rc = pcpu_setup_first_chunk(ai, vm.addr);
diff --git a/mm/shmem.c b/mm/shmem.c
index b3db3779a30a..2275a0ff7c30 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode)
}
spin_unlock(&sbinfo->shrinklist_lock);
}
- if (!list_empty(&info->swaplist)) {
+ while (!list_empty(&info->swaplist)) {
+ /* Wait while shmem_unuse() is scanning this inode... */
+ wait_var_event(&info->stop_eviction,
+ !atomic_read(&info->stop_eviction));
mutex_lock(&shmem_swaplist_mutex);
- list_del_init(&info->swaplist);
+ /* ...but beware of the race if we peeked too early */
+ if (!atomic_read(&info->stop_eviction))
+ list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
}
@@ -1099,10 +1104,11 @@ extern struct swap_info_struct *swap_info[];
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
- bool frontswap)
+ unsigned int type, bool frontswap)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
+ swp_entry_t entry;
unsigned int ret = 0;
if (!nr_entries)
@@ -1116,13 +1122,12 @@ static int shmem_find_swap_entries(struct address_space *mapping,
if (!xa_is_value(page))
continue;
- if (frontswap) {
- swp_entry_t entry = radix_to_swp_entry(page);
-
- if (!frontswap_test(swap_info[swp_type(entry)],
- swp_offset(entry)))
- continue;
- }
+ entry = radix_to_swp_entry(page);
+ if (swp_type(entry) != type)
+ continue;
+ if (frontswap &&
+ !frontswap_test(swap_info[type], swp_offset(entry)))
+ continue;
indices[ret] = xas.xa_index;
entries[ret] = page;
@@ -1194,7 +1199,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
pvec.pages, indices,
- frontswap);
+ type, frontswap);
if (pvec.nr == 0) {
ret = 0;
break;
@@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap,
unsigned long *fs_pages_to_unuse)
{
struct shmem_inode_info *info, *next;
- struct inode *inode;
- struct inode *prev_inode = NULL;
int error = 0;
if (list_empty(&shmem_swaplist))
return 0;
mutex_lock(&shmem_swaplist_mutex);
-
- /*
- * The extra refcount on the inode is necessary to safely dereference
- * p->next after re-acquiring the lock. New shmem inodes with swap
- * get added to the end of the list and we will scan them all.
- */
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
if (!info->swapped) {
list_del_init(&info->swaplist);
continue;
}
-
- inode = igrab(&info->vfs_inode);
- if (!inode)
- continue;
-
+ /*
+ * Drop the swaplist mutex while searching the inode for swap;
+ * but before doing so, make sure shmem_evict_inode() will not
+ * remove placeholder inode from swaplist, nor let it be freed
+ * (igrab() would protect from unlink, but not from unmount).
+ */
+ atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
- prev_inode = inode;
- error = shmem_unuse_inode(inode, type, frontswap,
+ error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
fs_pages_to_unuse);
cond_resched();
@@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap,
next = list_next_entry(info, swaplist);
if (!info->swapped)
list_del_init(&info->swaplist);
+ if (atomic_dec_and_test(&info->stop_eviction))
+ wake_up_var(&info->stop_eviction);
if (error)
break;
}
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
-
return error;
}
@@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
+ atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
diff --git a/mm/slab.c b/mm/slab.c
index 28652e4218e0..9142ee992493 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2115,6 +2115,8 @@ done:
cachep->allocflags = __GFP_COMP;
if (flags & SLAB_CACHE_DMA)
cachep->allocflags |= GFP_DMA;
+ if (flags & SLAB_CACHE_DMA32)
+ cachep->allocflags |= GFP_DMA32;
if (flags & SLAB_RECLAIM_ACCOUNT)
cachep->allocflags |= __GFP_RECLAIMABLE;
cachep->size = size;
@@ -2372,7 +2374,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
/* Slab management obj is off-slab. */
freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
- freelist = kasan_reset_tag(freelist);
if (!freelist)
return NULL;
} else {
@@ -4306,7 +4307,8 @@ static void show_symbol(struct seq_file *m, unsigned long address)
static int leaks_show(struct seq_file *m, void *p)
{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+ struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
+ root_caches_node);
struct page *page;
struct kmem_cache_node *n;
const char *name;
diff --git a/mm/slab.h b/mm/slab.h
index e5e6658eeacc..43ac818b8592 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
/* Legal flag mask for kmem_cache_create(), for various configurations */
-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
+ SLAB_CACHE_DMA32 | SLAB_PANIC | \
SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
#if defined(CONFIG_DEBUG_SLAB)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 03eeb8b7b4b1..58251ba63e4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
SLAB_FAILSLAB | SLAB_KASAN)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
- SLAB_ACCOUNT)
+ SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
/*
* Merge control. If this is set then no merging of slab caches will occur.
diff --git a/mm/slub.c b/mm/slub.c
index 1b08fbcb7e61..d30ede89f4a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= GFP_DMA;
+ if (s->flags & SLAB_CACHE_DMA32)
+ s->allocflags |= GFP_DMA32;
+
if (s->flags & SLAB_RECLAIM_ACCOUNT)
s->allocflags |= __GFP_RECLAIMABLE;
@@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s)
*/
if (s->flags & SLAB_CACHE_DMA)
*p++ = 'd';
+ if (s->flags & SLAB_CACHE_DMA32)
+ *p++ = 'D';
if (s->flags & SLAB_RECLAIM_ACCOUNT)
*p++ = 'a';
if (s->flags & SLAB_CONSISTENCY_CHECKS)
diff --git a/mm/sparse.c b/mm/sparse.c
index 69904aa6165b..56e057c432f9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-/* Mark all memory sections within the pfn range as online */
+/* Mark all memory sections within the pfn range as offline */
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b8d9c3fbb47..cf63b5f01adf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2023,7 +2023,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
* If the boolean frontswap is true, only unuse pages_to_unuse pages;
* pages_to_unuse==0 means all pages; ignored if frontswap is false
*/
-#define SWAP_UNUSE_MAX_TRIES 3
int try_to_unuse(unsigned int type, bool frontswap,
unsigned long pages_to_unuse)
{
@@ -2035,7 +2034,6 @@ int try_to_unuse(unsigned int type, bool frontswap,
struct page *page;
swp_entry_t entry;
unsigned int i;
- int retries = 0;
if (!si->inuse_pages)
return 0;
@@ -2053,11 +2051,9 @@ retry:
spin_lock(&mmlist_lock);
p = &init_mm.mmlist;
- while ((p = p->next) != &init_mm.mmlist) {
- if (signal_pending(current)) {
- retval = -EINTR;
- break;
- }
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (p = p->next) != &init_mm.mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
if (!mmget_not_zero(mm))
@@ -2084,7 +2080,9 @@ retry:
mmput(prev_mm);
i = 0;
- while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (i = find_next_to_unuse(si, i, frontswap)) != 0) {
entry = swp_entry(type, i);
page = find_get_page(swap_address_space(entry), i);
@@ -2117,14 +2115,18 @@ retry:
* If yes, we would need to do retry the unuse logic again.
* Under global memory pressure, swap entries can be reinserted back
* into process space after the mmlist loop above passes over them.
- * Its not worth continuosuly retrying to unuse the swap in this case.
- * So we try SWAP_UNUSE_MAX_TRIES times.
+ *
+ * Limit the number of retries? No: when mmget_not_zero() above fails,
+ * that mm is likely to be freeing swap from exit_mmap(), which proceeds
+ * at its own independent pace; and even shmem_writepage() could have
+ * been preempted after get_swap_page(), temporarily hiding that swap.
+ * It's easy and robust (though cpu-intensive) just to keep retrying.
*/
- if (++retries >= SWAP_UNUSE_MAX_TRIES)
- retval = -EBUSY;
- else if (si->inuse_pages)
- goto retry;
-
+ if (si->inuse_pages) {
+ if (!signal_pending(current))
+ goto retry;
+ retval = -EINTR;
+ }
out:
return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
}
diff --git a/mm/util.c b/mm/util.c
index d559bde497a9..43a2984bccaa 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -204,7 +204,7 @@ EXPORT_SYMBOL(vmemdup_user);
* @s: The string to duplicate
* @n: Maximum number of bytes to copy, including the trailing NUL.
*
- * Return: newly allocated copy of @s or %NULL in case of error
+ * Return: newly allocated copy of @s or an ERR_PTR() in case of error
*/
char *strndup_user(const char __user *s, long n)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a5ad0b35ab8e..a815f73ee4d5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2176,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct mem_cgroup *memcg,
struct scan_control *sc, bool actual_reclaim)
{
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2197,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
/*
* When refaults are being observed, it means a new workingset
* is being established. Disable active list protection to get
* rid of the stale workingset quickly.
*/
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
if (file && actual_reclaim && lruvec->refaults != refaults) {
inactive_ratio = 0;
} else {
@@ -2227,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct lruvec *lruvec, struct mem_cgroup *memcg,
- struct scan_control *sc)
+ struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru),
- memcg, sc, true))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2332,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* anonymous pages on the LRU in eligible zones.
* Otherwise, the small LRU gets thrashed.
*/
- if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, false, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
>> sc->priority) {
scan_balance = SCAN_ANON;
@@ -2350,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
@@ -2503,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- lruvec, memcg, sc);
+ lruvec, sc);
}
}
@@ -2570,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
@@ -2969,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
unsigned long refaults;
struct lruvec *lruvec;
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
@@ -3339,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 36b56f858f0f..a7d493366a65 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = {
#endif
#endif /* CONFIG_MEMORY_BALLOON */
#ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
"nr_tlb_remote_flush",
"nr_tlb_remote_flush_received",
-#else
- "", /* nr_tlb_remote_flush */
- "", /* nr_tlb_remote_flush_received */
-#endif /* CONFIG_SMP */
"nr_tlb_local_flush_all",
"nr_tlb_local_flush_one",
#endif /* CONFIG_DEBUG_TLBFLUSH */