summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c2
-rw-r--r--mm/huge_memory.c99
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/internal.h7
-rw-r--r--mm/kasan/kasan.h5
-rw-r--r--mm/kasan/report.c36
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/mempolicy.c20
-rw-r--r--mm/migrate.c9
-rw-r--r--mm/page_alloc.c54
-rw-r--r--mm/page_vma_mapped.c15
-rw-r--r--mm/percpu-vm.c7
-rw-r--r--mm/percpu.c5
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/swap.c27
-rw-r--r--mm/swap_cgroup.c2
-rw-r--r--mm/swap_slots.c2
-rw-r--r--mm/vmalloc.c3
-rw-r--r--mm/vmstat.c18
-rw-r--r--mm/workingset.c2
-rw-r--r--mm/z3fold.c10
-rw-r--r--mm/zsmalloc.c2
23 files changed, 213 insertions, 134 deletions
diff --git a/mm/gup.c b/mm/gup.c
index c74bad1bf6e8..04aa405350dc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1455,7 +1455,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
P4D_SHIFT, next, write, pages, nr))
return 0;
- } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+ } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1ebc93e179f3..f3c4f9d22821 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- } else if (!memcmp("defer", buf,
- min(sizeof("defer")-1, count))) {
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
- clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
- set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("defer+madvise", buf,
min(sizeof("defer+madvise")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ } else if (!memcmp("defer", buf,
+ min(sizeof("defer")-1, count))) {
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+ clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+ set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("madvise", buf,
min(sizeof("madvise")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
deactivate_page(page);
if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
- orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
- tlb->fullmm);
+ pmdp_invalidate(vma, addr, pmd);
orig_pmd = pmd_mkold(orig_pmd);
orig_pmd = pmd_mkclean(orig_pmd);
@@ -1724,37 +1723,69 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
- int ret = 0;
+ pmd_t entry;
+ bool preserve_write;
+ int ret;
ptl = __pmd_trans_huge_lock(pmd, vma);
- if (ptl) {
- pmd_t entry;
- bool preserve_write = prot_numa && pmd_write(*pmd);
- ret = 1;
+ if (!ptl)
+ return 0;
- /*
- * Avoid trapping faults against the zero page. The read-only
- * data is likely to be read-cached on the local CPU and
- * local/remote hits to the zero page are not interesting.
- */
- if (prot_numa && is_huge_zero_pmd(*pmd)) {
- spin_unlock(ptl);
- return ret;
- }
+ preserve_write = prot_numa && pmd_write(*pmd);
+ ret = 1;
- if (!prot_numa || !pmd_protnone(*pmd)) {
- entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
- entry = pmd_modify(entry, newprot);
- if (preserve_write)
- entry = pmd_mk_savedwrite(entry);
- ret = HPAGE_PMD_NR;
- set_pmd_at(mm, addr, pmd, entry);
- BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
- pmd_write(entry));
- }
- spin_unlock(ptl);
- }
+ /*
+ * Avoid trapping faults against the zero page. The read-only
+ * data is likely to be read-cached on the local CPU and
+ * local/remote hits to the zero page are not interesting.
+ */
+ if (prot_numa && is_huge_zero_pmd(*pmd))
+ goto unlock;
+
+ if (prot_numa && pmd_protnone(*pmd))
+ goto unlock;
+
+ /*
+ * In case prot_numa, we are under down_read(mmap_sem). It's critical
+ * to not clear pmd intermittently to avoid race with MADV_DONTNEED
+ * which is also under down_read(mmap_sem):
+ *
+ * CPU0: CPU1:
+ * change_huge_pmd(prot_numa=1)
+ * pmdp_huge_get_and_clear_notify()
+ * madvise_dontneed()
+ * zap_pmd_range()
+ * pmd_trans_huge(*pmd) == 0 (without ptl)
+ * // skip the pmd
+ * set_pmd_at();
+ * // pmd is re-established
+ *
+ * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+ * which may break userspace.
+ *
+ * pmdp_invalidate() is required to make sure we don't miss
+ * dirty/young flags set by hardware.
+ */
+ entry = *pmd;
+ pmdp_invalidate(vma, addr, pmd);
+ /*
+ * Recover dirty/young flags. It relies on pmdp_invalidate to not
+ * corrupt them.
+ */
+ if (pmd_dirty(*pmd))
+ entry = pmd_mkdirty(entry);
+ if (pmd_young(*pmd))
+ entry = pmd_mkyoung(entry);
+
+ entry = pmd_modify(entry, newprot);
+ if (preserve_write)
+ entry = pmd_mk_savedwrite(entry);
+ ret = HPAGE_PMD_NR;
+ set_pmd_at(mm, addr, pmd, entry);
+ BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
+unlock:
+ spin_unlock(ptl);
return ret;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3d0aab9ee80d..e5828875f7bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4403,7 +4403,9 @@ int hugetlb_reserve_pages(struct inode *inode,
return 0;
out_err:
if (!vma || vma->vm_flags & VM_MAYSHARE)
- region_abort(resv_map, from, to);
+ /* Don't call region_abort if region_chg failed */
+ if (chg >= 0)
+ region_abort(resv_map, from, to);
if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
kref_put(&resv_map->refs, resv_map_release);
return ret;
@@ -4651,6 +4653,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
struct page *page = NULL;
spinlock_t *ptl;
+ pte_t pte;
retry:
ptl = pmd_lockptr(mm, pmd);
spin_lock(ptl);
@@ -4660,12 +4663,13 @@ retry:
*/
if (!pmd_huge(*pmd))
goto out;
- if (pmd_present(*pmd)) {
+ pte = huge_ptep_get((pte_t *)pmd);
+ if (pte_present(pte)) {
page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
if (flags & FOLL_GET)
get_page(page);
} else {
- if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+ if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
__migration_entry_wait(mm, (pte_t *)pmd, ptl);
goto retry;
diff --git a/mm/internal.h b/mm/internal.h
index ccfc2a2969f4..266efaeaa370 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
enum ttu_flags;
struct tlbflush_unmap_batch;
+
+/*
+ * only for MM internal work items which do not depend on
+ * any allocations or locks which might depend on allocations
+ */
+extern struct workqueue_struct *mm_percpu_wq;
+
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 1c260e6b3b3c..dd2dea8eb077 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
<< KASAN_SHADOW_SCALE_SHIFT);
}
-static inline bool kasan_report_enabled(void)
-{
- return !current->kasan_depth;
-}
-
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
void kasan_report_double_free(struct kmem_cache *cache, void *object,
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index f479365530b6..ab42a0803f16 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,7 +13,9 @@
*
*/
+#include <linux/bitops.h>
#include <linux/ftrace.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/printk.h>
@@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info)
kasan_end_report(&flags);
}
+static unsigned long kasan_flags;
+
+#define KASAN_BIT_REPORTED 0
+#define KASAN_BIT_MULTI_SHOT 1
+
+bool kasan_save_enable_multi_shot(void)
+{
+ return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot);
+
+void kasan_restore_multi_shot(bool enabled)
+{
+ if (!enabled)
+ clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
+
+static int __init kasan_set_multi_shot(char *str)
+{
+ set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+ return 1;
+}
+__setup("kasan_multi_shot", kasan_set_multi_shot);
+
+static inline bool kasan_report_enabled(void)
+{
+ if (current->kasan_depth)
+ return false;
+ if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+ return true;
+ return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
+}
+
void kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip)
{
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 26c874e90b12..20036d4f9f13 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1416,7 +1416,7 @@ static void kmemleak_scan(void)
/* data/bss scanning */
scan_large_block(_sdata, _edata);
scan_large_block(__bss_start, __bss_stop);
- scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init);
+ scan_large_block(__start_ro_after_init, __end_ro_after_init);
#ifdef CONFIG_SMP
/* per-cpu sections scanning */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 295479b792ec..6fa7208bcd56 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -125,9 +125,12 @@ void put_online_mems(void)
}
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
void mem_hotplug_begin(void)
{
- assert_held_device_hotplug();
+ mutex_lock(&memory_add_remove_lock);
mem_hotplug.active_writer = current;
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
mem_hotplug.active_writer = NULL;
mutex_unlock(&mem_hotplug.lock);
memhp_lock_release();
+ mutex_unlock(&memory_add_remove_lock);
}
/* add this memory to iomem resource */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 75b2745bac41..37d0b334bfe9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1529,7 +1529,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
compat_ulong_t, maxnode)
{
- long err = 0;
unsigned long __user *nm = NULL;
unsigned long nr_bits, alloc_size;
DECLARE_BITMAP(bm, MAX_NUMNODES);
@@ -1538,14 +1537,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask) {
- err = compat_get_bitmap(bm, nmask, nr_bits);
+ if (compat_get_bitmap(bm, nmask, nr_bits))
+ return -EFAULT;
nm = compat_alloc_user_space(alloc_size);
- err |= copy_to_user(nm, bm, alloc_size);
+ if (copy_to_user(nm, bm, alloc_size))
+ return -EFAULT;
}
- if (err)
- return -EFAULT;
-
return sys_set_mempolicy(mode, nm, nr_bits+1);
}
@@ -1553,7 +1551,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
compat_ulong_t, mode, compat_ulong_t __user *, nmask,
compat_ulong_t, maxnode, compat_ulong_t, flags)
{
- long err = 0;
unsigned long __user *nm = NULL;
unsigned long nr_bits, alloc_size;
nodemask_t bm;
@@ -1562,14 +1559,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
if (nmask) {
- err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);
+ if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
+ return -EFAULT;
nm = compat_alloc_user_space(alloc_size);
- err |= copy_to_user(nm, nodes_addr(bm), alloc_size);
+ if (copy_to_user(nm, nodes_addr(bm), alloc_size))
+ return -EFAULT;
}
- if (err)
- return -EFAULT;
-
return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 9a0897a14d37..738f1d5f8350 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l)
unlock_page(page);
put_page(page);
} else {
- putback_lru_page(page);
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
+ putback_lru_page(page);
}
}
}
@@ -209,8 +209,11 @@ static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(PageTail(page), page);
while (page_vma_mapped_walk(&pvmw)) {
- new = page - pvmw.page->index +
- linear_page_index(vma, pvmw.address);
+ if (PageKsm(page))
+ new = page;
+ else
+ new = page - pvmw.page->index +
+ linear_page_index(vma, pvmw.address);
get_page(new);
pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6cbde310abed..07efbc3a8656 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1090,10 +1090,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- unsigned long nr_scanned, flags;
+ unsigned long nr_scanned;
bool isolated_pageblocks;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
isolated_pageblocks = has_isolate_pageblock(zone);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
@@ -1142,7 +1142,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--count && --batch_free && !list_empty(list));
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -1150,9 +1150,8 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
- unsigned long nr_scanned, flags;
- spin_lock_irqsave(&zone->lock, flags);
- __count_vm_events(PGFREE, 1 << order);
+ unsigned long nr_scanned;
+ spin_lock(&zone->lock);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1162,7 +1161,7 @@ static void free_one_page(struct zone *zone,
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1240,6 +1239,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
static void __free_pages_ok(struct page *page, unsigned int order)
{
+ unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
@@ -1247,7 +1247,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
return;
migratetype = get_pfnblock_migratetype(page, pfn);
+ local_irq_save(flags);
+ __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
+ local_irq_restore(flags);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2219,9 +2222,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
int migratetype, bool cold)
{
int i, alloced = 0;
- unsigned long flags;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
@@ -2257,7 +2259,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
return alloced;
}
@@ -2373,6 +2375,13 @@ void drain_all_pages(struct zone *zone)
*/
static cpumask_t cpus_with_pcps;
+ /*
+ * Make sure nobody triggers this path before mm_percpu_wq is fully
+ * initialized.
+ */
+ if (WARN_ON_ONCE(!mm_percpu_wq))
+ return;
+
/* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER)
return;
@@ -2422,7 +2431,7 @@ void drain_all_pages(struct zone *zone)
for_each_cpu(cpu, &cpus_with_pcps) {
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
INIT_WORK(work, drain_local_pages_wq);
- schedule_work_on(cpu, work);
+ queue_work_on(cpu, mm_percpu_wq, work);
}
for_each_cpu(cpu, &cpus_with_pcps)
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
@@ -2478,20 +2487,17 @@ void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
+ unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
- if (in_interrupt()) {
- __free_pages_ok(page, 0);
- return;
- }
-
if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
- preempt_disable();
+ local_irq_save(flags);
+ __count_vm_event(PGFREE);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
@@ -2508,7 +2514,6 @@ void free_hot_cold_page(struct page *page, bool cold)
migratetype = MIGRATE_MOVABLE;
}
- __count_vm_event(PGFREE);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2522,7 +2527,7 @@ void free_hot_cold_page(struct page *page, bool cold)
}
out:
- preempt_enable();
+ local_irq_restore(flags);
}
/*
@@ -2647,8 +2652,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
{
struct page *page;
- VM_BUG_ON(in_interrupt());
-
do {
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
@@ -2679,8 +2682,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct list_head *list;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
+ unsigned long flags;
- preempt_disable();
+ local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
@@ -2688,7 +2692,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
}
- preempt_enable();
+ local_irq_restore(flags);
return page;
}
@@ -2704,7 +2708,7 @@ struct page *rmqueue(struct zone *preferred_zone,
unsigned long flags;
struct page *page;
- if (likely(order == 0) && !in_interrupt()) {
+ if (likely(order == 0)) {
page = rmqueue_pcplist(preferred_zone, zone, order,
gfp_flags, migratetype);
goto out;
@@ -4519,13 +4523,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_FILE_MAPPED)),
K(node_page_state(pgdat, NR_FILE_DIRTY)),
K(node_page_state(pgdat, NR_WRITEBACK)),
+ K(node_page_state(pgdat, NR_SHMEM)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
* HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif
- K(node_page_state(pgdat, NR_SHMEM)),
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
node_page_state(pgdat, NR_PAGES_SCANNED),
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index c4c9def8ffea..de9c40d7304a 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw);
- /* Only for THP, seek to next pte entry makes sense */
- if (pvmw->pte) {
- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
- return not_found(pvmw);
+ if (pvmw->pte)
goto next_pte;
- }
if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
@@ -165,9 +161,14 @@ restart:
while (1) {
if (check_pte(pvmw))
return true;
-next_pte: do {
+next_pte:
+ /* Seek to next pte only makes sense for THP */
+ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+ return not_found(pvmw);
+ do {
pvmw->address += PAGE_SIZE;
- if (pvmw->address >=
+ if (pvmw->address >= pvmw->vma->vm_end ||
+ pvmw->address >=
__vma_address(pvmw->page, pvmw->vma) +
hpage_nr_pages(pvmw->page) * PAGE_SIZE)
return not_found(pvmw);
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 538998a137d2..9ac639499bd1 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
/**
* pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
*
* Returns pointer to array of pointers to struct page which can be indexed
* with pcpu_page_idx(). Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
* RETURNS:
* Pointer to temp pages array on success.
*/
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
{
static struct page **pages;
size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
{
struct page **pages;
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
if (!pages)
return -ENOMEM;
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
* successful population attempt so the temp pages array must
* be available now.
*/
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
BUG_ON(!pages);
/* unmap and free */
diff --git a/mm/percpu.c b/mm/percpu.c
index 5696039b5c07..60a6488e9e6d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1011,8 +1011,11 @@ area_found:
mutex_unlock(&pcpu_alloc_mutex);
}
- if (chunk != pcpu_reserved_chunk)
+ if (chunk != pcpu_reserved_chunk) {
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_nr_empty_pop_pages -= occ_pages;
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+ }
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
pcpu_schedule_balance_work();
diff --git a/mm/rmap.c b/mm/rmap.c
index 8ffd59df8a3f..1b776f793998 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1159,7 +1159,7 @@ void page_add_file_rmap(struct page *page, bool compound)
goto out;
}
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
out:
unlock_page_memcg(page);
}
@@ -1199,7 +1199,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
diff --git a/mm/swap.c b/mm/swap.c
index c4910f14f957..5dabf444d724 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
-/*
- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
- * workqueue, aiding in getting memory freed.
- */
-static struct workqueue_struct *lru_add_drain_wq;
-
-static int __init lru_init(void)
-{
- lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
-
- if (WARN(!lru_add_drain_wq,
- "Failed to create workqueue lru_add_drain_wq"))
- return -ENOMEM;
-
- return 0;
-}
-early_initcall(lru_init);
-
void lru_add_drain_all(void)
{
static DEFINE_MUTEX(lock);
static struct cpumask has_work;
int cpu;
+ /*
+ * Make sure nobody triggers this path before mm_percpu_wq is fully
+ * initialized.
+ */
+ if (WARN_ON(!mm_percpu_wq))
+ return;
+
mutex_lock(&lock);
get_online_cpus();
cpumask_clear(&has_work);
@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
- queue_work_on(cpu, lru_add_drain_wq, work);
+ queue_work_on(cpu, mm_percpu_wq, work);
cpumask_set_cpu(cpu, &has_work);
}
}
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 310ac0b8f974..ac6318a064d3 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
struct page *page = map[i];
if (page)
__free_page(page);
+ if (!(i % SWAP_CLUSTER_MAX))
+ cond_resched();
}
vfree(map);
}
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad..b1ccb58ad397 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
{
struct swap_slots_cache *cache;
- BUG_ON(!swap_slot_cache_initialized);
-
cache = &get_cpu_var(swp_slots);
if (use_swap_slot_cache && cache->slots_ret) {
spin_lock_irq(&cache->free_lock);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0dd80222b20b..0b057628a7ba 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
if (fatal_signal_pending(current)) {
area->nr_pages = i;
- goto fail;
+ goto fail_no_warn;
}
if (node == NUMA_NO_NODE)
@@ -1709,6 +1709,7 @@ fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
vfree(area->addr);
return NULL;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b1947f0cbee2..5a4f5c5a31e8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP
-static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
- queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+ queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
}
@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
if (!delayed_work_pending(dw) && need_update(cpu))
- queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+ queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
}
put_online_cpus();
@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
vmstat_update);
- vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
}
@@ -1764,11 +1762,15 @@ static int vmstat_cpu_dead(unsigned int cpu)
#endif
-static int __init setup_vmstat(void)
+struct workqueue_struct *mm_percpu_wq;
+
+void __init init_mm_internals(void)
{
-#ifdef CONFIG_SMP
- int ret;
+ int ret __maybe_unused;
+
+ mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
+#ifdef CONFIG_SMP
ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
NULL, vmstat_cpu_dead);
if (ret < 0)
@@ -1792,9 +1794,7 @@ static int __init setup_vmstat(void)
proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
#endif
- return 0;
}
-module_init(setup_vmstat)
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
diff --git a/mm/workingset.c b/mm/workingset.c
index ac839fca0e76..eda05c71fa49 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -532,7 +532,7 @@ static int __init workingset_init(void)
pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
timestamp_bits, max_order, bucket_order);
- ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
+ ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
if (ret)
goto err;
ret = register_shrinker(&workingset_shadow_shrinker);
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8970a2fd3b1a..54f63c4a809a 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
spin_lock(&zhdr->page_lock);
}
+/* Try to lock a z3fold page */
+static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
+{
+ return spin_trylock(&zhdr->page_lock);
+}
+
/* Unlock a z3fold page */
static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
{
@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_lock(&pool->lock);
zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy);
- if (!zhdr) {
+ if (!zhdr || !z3fold_page_trylock(zhdr)) {
spin_unlock(&pool->lock);
continue;
}
@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_unlock(&pool->lock);
page = virt_to_page(zhdr);
- z3fold_page_lock(zhdr);
if (zhdr->first_chunks == 0) {
if (zhdr->middle_chunks != 0 &&
chunks >= zhdr->start_middle)
@@ -667,6 +672,7 @@ next:
z3fold_page_unlock(zhdr);
spin_lock(&pool->lock);
if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+ spin_unlock(&pool->lock);
atomic64_dec(&pool->pages_nr);
return 0;
}
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7ee9c34dbd6..d41edd28298b 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -276,7 +276,7 @@ struct zs_pool {
struct zspage {
struct {
unsigned int fullness:FULLNESS_BITS;
- unsigned int class:CLASS_BITS;
+ unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS;
unsigned int magic:MAGIC_VAL_BITS;
};