summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/cma.c4
-rw-r--r--mm/compaction.c17
-rw-r--r--mm/debug.c8
-rw-r--r--mm/debug_vm_pgtable.c4
-rw-r--r--mm/filemap.c23
-rw-r--r--mm/gup.c4
-rw-r--r--mm/hugetlb.c17
-rw-r--r--mm/khugepaged.c3
-rw-r--r--mm/maccess.c61
-rw-r--r--mm/memcontrol.c31
-rw-r--r--mm/memory.c35
-rw-r--r--mm/memory_hotplug.c13
-rw-r--r--mm/migrate.c13
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/mremap.c23
-rw-r--r--mm/nommu.c17
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/rodata_test.c2
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c37
-rw-r--r--mm/slub.c21
-rw-r--r--mm/swap.c3
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmalloc.c21
-rw-r--r--mm/vmscan.c3
-rw-r--r--mm/workingset.c46
27 files changed, 255 insertions, 179 deletions
diff --git a/mm/cma.c b/mm/cma.c
index 0463ad2ce06b..26ecff818881 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -339,13 +339,13 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
*/
if (base < highmem_start && limit > highmem_start) {
addr = memblock_alloc_range_nid(size, alignment,
- highmem_start, limit, nid, false);
+ highmem_start, limit, nid, true);
limit = highmem_start;
}
if (!addr) {
addr = memblock_alloc_range_nid(size, alignment, base,
- limit, nid, false);
+ limit, nid, true);
if (!addr) {
ret = -ENOMEM;
goto err;
diff --git a/mm/compaction.c b/mm/compaction.c
index fd988b7e5f2b..86375605faa9 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2316,15 +2316,26 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.page = NULL,
};
- current->capture_control = &capc;
+ /*
+ * Make sure the structs are really initialized before we expose the
+ * capture control, in case we are interrupted and the interrupt handler
+ * frees a page.
+ */
+ barrier();
+ WRITE_ONCE(current->capture_control, &capc);
ret = compact_zone(&cc, &capc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
- *capture = capc.page;
- current->capture_control = NULL;
+ /*
+ * Make sure we hide capture control first before we read the captured
+ * page pointer, otherwise an interrupt could free and capture a page
+ * and we would leak it.
+ */
+ WRITE_ONCE(current->capture_control, NULL);
+ *capture = READ_ONCE(capc.page);
return ret;
}
diff --git a/mm/debug.c b/mm/debug.c
index b5b1de8c71ac..4f376514744d 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason)
* mapping can be invalid pointer and we don't want to crash
* accessing it, so probe everything depending on it carefully
*/
- if (probe_kernel_read(&host, &mapping->host,
+ if (copy_from_kernel_nofault(&host, &mapping->host,
sizeof(struct inode *)) ||
- probe_kernel_read(&a_ops, &mapping->a_ops,
+ copy_from_kernel_nofault(&a_ops, &mapping->a_ops,
sizeof(struct address_space_operations *))) {
pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n");
goto out_mapping;
@@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason)
goto out_mapping;
}
- if (probe_kernel_read(&dentry_first,
+ if (copy_from_kernel_nofault(&dentry_first,
&host->i_dentry.first, sizeof(struct hlist_node *))) {
pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n",
a_ops, host);
@@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason)
}
dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
- if (probe_kernel_read(&dentry, dentry_ptr,
+ if (copy_from_kernel_nofault(&dentry, dentry_ptr,
sizeof(struct dentry))) {
pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n",
a_ops, dentry_ptr);
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index e45623016aea..61ab16fb2e36 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -246,13 +246,13 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
unsigned long vaddr)
{
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = ptep_get(ptep);
pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
set_pte_at(mm, vaddr, ptep, pte);
barrier();
pte_clear(mm, vaddr, ptep);
- pte = READ_ONCE(*ptep);
+ pte = ptep_get(ptep);
WARN_ON(!pte_none(pte));
}
diff --git a/mm/filemap.c b/mm/filemap.c
index f0ae9a6308cb..385759c4ce4b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2028,7 +2028,7 @@ find_page:
page = find_get_page(mapping, index);
if (!page) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
@@ -2038,6 +2038,10 @@ find_page:
goto no_cached_page;
}
if (PageReadahead(page)) {
+ if (iocb->ki_flags & IOCB_NOIO) {
+ put_page(page);
+ goto out;
+ }
page_cache_async_readahead(mapping,
ra, filp, page,
index, last_index - index);
@@ -2160,6 +2164,11 @@ page_not_up_to_date_locked:
}
readpage:
+ if (iocb->ki_flags & IOCB_NOIO) {
+ unlock_page(page);
+ put_page(page);
+ goto would_block;
+ }
/*
* A previous I/O error may have been due to temporary
* failures, eg. multipath errors.
@@ -2249,9 +2258,19 @@ EXPORT_SYMBOL_GPL(generic_file_buffered_read);
*
* This is the "read_iter()" routine for all filesystems
* that can use the page cache directly.
+ *
+ * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall
+ * be returned when no data can be read without waiting for I/O requests
+ * to complete; it doesn't prevent readahead.
+ *
+ * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O
+ * requests shall be made for the read or for readahead. When no data
+ * can be read, -EAGAIN shall be returned. When readahead would be
+ * triggered, a partial, possibly empty read shall be returned.
+ *
* Return:
* * number of bytes copied, even for partial reads
- * * negative error code if nothing was read
+ * * negative error code (or 0 if IOCB_NOIO) if nothing was read
*/
ssize_t
generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/mm/gup.c b/mm/gup.c
index de9e36262ccb..6f47697f8fb0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2196,7 +2196,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
*/
static inline pte_t gup_get_pte(pte_t *ptep)
{
- return READ_ONCE(*ptep);
+ return ptep_get(ptep);
}
#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
@@ -2425,7 +2425,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
if (pte_end < end)
end = pte_end;
- pte = READ_ONCE(*ptep);
+ pte = huge_ptep_get(ptep);
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 57ece74e3aae..590111ea6975 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -45,7 +45,10 @@ int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
struct hstate hstates[HUGE_MAX_HSTATE];
+#ifdef CONFIG_CMA
static struct cma *hugetlb_cma[MAX_NUMNODES];
+#endif
+static unsigned long hugetlb_cma_size __initdata;
/*
* Minimum page order among possible hugepage sizes, set to a proper value
@@ -1235,9 +1238,10 @@ static void free_gigantic_page(struct page *page, unsigned int order)
* If the page isn't allocated using the cma allocator,
* cma_release() returns false.
*/
- if (IS_ENABLED(CONFIG_CMA) &&
- cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+#ifdef CONFIG_CMA
+ if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
return;
+#endif
free_contig_range(page_to_pfn(page), 1 << order);
}
@@ -1248,7 +1252,8 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
{
unsigned long nr_pages = 1UL << huge_page_order(h);
- if (IS_ENABLED(CONFIG_CMA)) {
+#ifdef CONFIG_CMA
+ {
struct page *page;
int node;
@@ -1262,6 +1267,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
return page;
}
}
+#endif
return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
}
@@ -1593,7 +1599,7 @@ static struct address_space *_get_hugetlb_page_mapping(struct page *hpage)
/* Use first found vma */
pgoff_start = page_to_pgoff(hpage);
- pgoff_end = pgoff_start + hpage_nr_pages(hpage) - 1;
+ pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1;
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
@@ -2571,7 +2577,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
for (i = 0; i < h->max_huge_pages; ++i) {
if (hstate_is_gigantic(h)) {
- if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) {
+ if (hugetlb_cma_size) {
pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
break;
}
@@ -5654,7 +5660,6 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
}
#ifdef CONFIG_CMA
-static unsigned long hugetlb_cma_size __initdata;
static bool cma_reserve_called __initdata;
static int __init cmdline_parse_hugetlb_cma(char *p)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b043c40a21d4..700f5160f3e4 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -958,6 +958,9 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_ADDRESS_RANGE;
if (!hugepage_vma_check(vma, vma->vm_flags))
return SCAN_VMA_CHECK;
+ /* Anon VMA expected */
+ if (!vma->anon_vma || vma->vm_ops)
+ return SCAN_VMA_CHECK;
return 0;
}
diff --git a/mm/maccess.c b/mm/maccess.c
index 88845eda5047..f98ff91e32c6 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -6,14 +6,15 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
-bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size)
+bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
+ size_t size)
{
return true;
}
#ifdef HAVE_GET_KERNEL_NOFAULT
-#define probe_kernel_read_loop(dst, src, len, type, err_label) \
+#define copy_from_kernel_nofault_loop(dst, src, len, type, err_label) \
while (len >= sizeof(type)) { \
__get_kernel_nofault(dst, src, type, err_label); \
dst += sizeof(type); \
@@ -21,25 +22,25 @@ bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size)
len -= sizeof(type); \
}
-long probe_kernel_read(void *dst, const void *src, size_t size)
+long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
{
- if (!probe_kernel_read_allowed(src, size))
+ if (!copy_from_kernel_nofault_allowed(src, size))
return -ERANGE;
pagefault_disable();
- probe_kernel_read_loop(dst, src, size, u64, Efault);
- probe_kernel_read_loop(dst, src, size, u32, Efault);
- probe_kernel_read_loop(dst, src, size, u16, Efault);
- probe_kernel_read_loop(dst, src, size, u8, Efault);
+ copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
+ copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
+ copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
+ copy_from_kernel_nofault_loop(dst, src, size, u8, Efault);
pagefault_enable();
return 0;
Efault:
pagefault_enable();
return -EFAULT;
}
-EXPORT_SYMBOL_GPL(probe_kernel_read);
+EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
-#define probe_kernel_write_loop(dst, src, len, type, err_label) \
+#define copy_to_kernel_nofault_loop(dst, src, len, type, err_label) \
while (len >= sizeof(type)) { \
__put_kernel_nofault(dst, src, type, err_label); \
dst += sizeof(type); \
@@ -47,13 +48,13 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
len -= sizeof(type); \
}
-long probe_kernel_write(void *dst, const void *src, size_t size)
+long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
{
pagefault_disable();
- probe_kernel_write_loop(dst, src, size, u64, Efault);
- probe_kernel_write_loop(dst, src, size, u32, Efault);
- probe_kernel_write_loop(dst, src, size, u16, Efault);
- probe_kernel_write_loop(dst, src, size, u8, Efault);
+ copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
+ copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
+ copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
+ copy_to_kernel_nofault_loop(dst, src, size, u8, Efault);
pagefault_enable();
return 0;
Efault:
@@ -67,7 +68,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
if (unlikely(count <= 0))
return 0;
- if (!probe_kernel_read_allowed(unsafe_addr, count))
+ if (!copy_from_kernel_nofault_allowed(unsafe_addr, count))
return -ERANGE;
pagefault_disable();
@@ -87,7 +88,7 @@ Efault:
}
#else /* HAVE_GET_KERNEL_NOFAULT */
/**
- * probe_kernel_read(): safely attempt to read from kernel-space
+ * copy_from_kernel_nofault(): safely attempt to read from kernel-space
* @dst: pointer to the buffer that shall take the data
* @src: address to read from
* @size: size of the data chunk
@@ -98,15 +99,15 @@ Efault:
*
* We ensure that the copy_from_user is executed in atomic context so that
* do_page_fault() doesn't attempt to take mmap_lock. This makes
- * probe_kernel_read() suitable for use within regions where the caller
+ * copy_from_kernel_nofault() suitable for use within regions where the caller
* already holds mmap_lock, or other locks which nest inside mmap_lock.
*/
-long probe_kernel_read(void *dst, const void *src, size_t size)
+long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
{
long ret;
mm_segment_t old_fs = get_fs();
- if (!probe_kernel_read_allowed(src, size))
+ if (!copy_from_kernel_nofault_allowed(src, size))
return -ERANGE;
set_fs(KERNEL_DS);
@@ -120,10 +121,10 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(probe_kernel_read);
+EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
/**
- * probe_kernel_write(): safely attempt to write to a location
+ * copy_to_kernel_nofault(): safely attempt to write to a location
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
@@ -131,7 +132,7 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
-long probe_kernel_write(void *dst, const void *src, size_t size)
+long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
{
long ret;
mm_segment_t old_fs = get_fs();
@@ -174,7 +175,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
if (unlikely(count <= 0))
return 0;
- if (!probe_kernel_read_allowed(unsafe_addr, count))
+ if (!copy_from_kernel_nofault_allowed(unsafe_addr, count))
return -ERANGE;
set_fs(KERNEL_DS);
@@ -193,7 +194,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
#endif /* HAVE_GET_KERNEL_NOFAULT */
/**
- * probe_user_read(): safely attempt to read from a user-space location
+ * copy_from_user_nofault(): safely attempt to read from a user-space location
* @dst: pointer to the buffer that shall take the data
* @src: address to read from. This must be a user address.
* @size: size of the data chunk
@@ -201,7 +202,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
* Safely read from user address @src to the buffer at @dst. If a kernel fault
* happens, handle that and return -EFAULT.
*/
-long probe_user_read(void *dst, const void __user *src, size_t size)
+long copy_from_user_nofault(void *dst, const void __user *src, size_t size)
{
long ret = -EFAULT;
mm_segment_t old_fs = get_fs();
@@ -218,10 +219,10 @@ long probe_user_read(void *dst, const void __user *src, size_t size)
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(probe_user_read);
+EXPORT_SYMBOL_GPL(copy_from_user_nofault);
/**
- * probe_user_write(): safely attempt to write to a user-space location
+ * copy_to_user_nofault(): safely attempt to write to a user-space location
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
@@ -229,7 +230,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
-long probe_user_write(void __user *dst, const void *src, size_t size)
+long copy_to_user_nofault(void __user *dst, const void *src, size_t size)
{
long ret = -EFAULT;
mm_segment_t old_fs = get_fs();
@@ -246,7 +247,7 @@ long probe_user_write(void __user *dst, const void *src, size_t size)
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(probe_user_write);
+EXPORT_SYMBOL_GPL(copy_to_user_nofault);
/**
* strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0b38b6ad547d..13f559af1ab6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2772,8 +2772,10 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
return;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
- if (!cw)
+ if (!cw) {
+ css_put(&memcg->css);
return;
+ }
cw->memcg = memcg;
cw->cachep = cachep;
@@ -5667,7 +5669,6 @@ static void __mem_cgroup_clear_mc(void)
if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
- mem_cgroup_id_get_many(mc.to, mc.moved_swap);
css_put_many(&mc.to->css, mc.moved_swap);
mc.moved_swap = 0;
@@ -5858,7 +5859,8 @@ put: /* get_mctgt_type() gets the page */
ent = target.ent;
if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
mc.precharge--;
- /* we fixup refcnts and charges later. */
+ mem_cgroup_id_get_many(mc.to, 1);
+ /* we fixup other refcnts and charges later. */
mc.moved_swap++;
}
break;
@@ -6360,11 +6362,16 @@ static unsigned long effective_protection(unsigned long usage,
* We're using unprotected memory for the weight so that if
* some cgroups DO claim explicit protection, we don't protect
* the same bytes twice.
+ *
+ * Check both usage and parent_usage against the respective
+ * protected values. One should imply the other, but they
+ * aren't read atomically - make sure the division is sane.
*/
if (!(cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT))
return ep;
-
- if (parent_effective > siblings_protected && usage > protected) {
+ if (parent_effective > siblings_protected &&
+ parent_usage > siblings_protected &&
+ usage > protected) {
unsigned long unclaimed;
unclaimed = parent_effective - siblings_protected;
@@ -6416,7 +6423,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
if (parent == root) {
memcg->memory.emin = READ_ONCE(memcg->memory.min);
- memcg->memory.elow = memcg->memory.low;
+ memcg->memory.elow = READ_ONCE(memcg->memory.low);
goto out;
}
@@ -6428,7 +6435,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
atomic_long_read(&parent->memory.children_min_usage)));
WRITE_ONCE(memcg->memory.elow, effective_protection(usage, parent_usage,
- memcg->memory.low, READ_ONCE(parent->memory.elow),
+ READ_ONCE(memcg->memory.low),
+ READ_ONCE(parent->memory.elow),
atomic_long_read(&parent->memory.children_low_usage)));
out:
@@ -7178,6 +7186,13 @@ static struct cftype memsw_files[] = {
{ }, /* terminate */
};
+/*
+ * If mem_cgroup_swap_init() is implemented as a subsys_initcall()
+ * instead of a core_initcall(), this could mean cgroup_memory_noswap still
+ * remains set to false even when memcg is disabled via "cgroup_disable=memory"
+ * boot parameter. This may result in premature OOPS inside
+ * mem_cgroup_get_nr_swap_pages() function in corner cases.
+ */
static int __init mem_cgroup_swap_init(void)
{
/* No memory control -> no swap control */
@@ -7192,6 +7207,6 @@ static int __init mem_cgroup_swap_init(void)
return 0;
}
-subsys_initcall(mem_cgroup_swap_init);
+core_initcall(mem_cgroup_swap_init);
#endif /* CONFIG_MEMCG_SWAP */
diff --git a/mm/memory.c b/mm/memory.c
index dc7f3543b1fd..3ecad55103ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1498,7 +1498,7 @@ out:
}
#ifdef pte_index
-static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
+static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
unsigned long addr, struct page *page, pgprot_t prot)
{
int err;
@@ -1506,8 +1506,9 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd,
if (!page_count(page))
return -EINVAL;
err = validate_page_before_insert(page);
- return err ? err : insert_page_into_pte_locked(
- mm, pte_offset_map(pmd, addr), addr, page, prot);
+ if (err)
+ return err;
+ return insert_page_into_pte_locked(mm, pte, addr, page, prot);
}
/* insert_pages() amortizes the cost of spinlock operations
@@ -1517,7 +1518,8 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num, pgprot_t prot)
{
pmd_t *pmd = NULL;
- spinlock_t *pte_lock = NULL;
+ pte_t *start_pte, *pte;
+ spinlock_t *pte_lock;
struct mm_struct *const mm = vma->vm_mm;
unsigned long curr_page_idx = 0;
unsigned long remaining_pages_total = *num;
@@ -1536,18 +1538,17 @@ more:
ret = -ENOMEM;
if (pte_alloc(mm, pmd))
goto out;
- pte_lock = pte_lockptr(mm, pmd);
while (pages_to_write_in_pmd) {
int pte_idx = 0;
const int batch_size = min_t(int, pages_to_write_in_pmd, 8);
- spin_lock(pte_lock);
- for (; pte_idx < batch_size; ++pte_idx) {
- int err = insert_page_in_batch_locked(mm, pmd,
+ start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
+ for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
+ int err = insert_page_in_batch_locked(mm, pte,
addr, pages[curr_page_idx], prot);
if (unlikely(err)) {
- spin_unlock(pte_lock);
+ pte_unmap_unlock(start_pte, pte_lock);
ret = err;
remaining_pages_total -= pte_idx;
goto out;
@@ -1555,7 +1556,7 @@ more:
addr += PAGE_SIZE;
++curr_page_idx;
}
- spin_unlock(pte_lock);
+ pte_unmap_unlock(start_pte, pte_lock);
pages_to_write_in_pmd -= batch_size;
remaining_pages_total -= batch_size;
}
@@ -1600,7 +1601,7 @@ int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
return insert_pages(vma, addr, pages, num, vma->vm_page_prot);
#else
unsigned long idx = 0, pgcount = *num;
- int err;
+ int err = -EINVAL;
for (; idx < pgcount; ++idx) {
err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]);
@@ -3140,8 +3141,18 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
err = mem_cgroup_charge(page, vma->vm_mm,
GFP_KERNEL);
ClearPageSwapCache(page);
- if (err)
+ if (err) {
+ ret = VM_FAULT_OOM;
goto out_page;
+ }
+
+ /*
+ * XXX: Move to lru_cache_add() when it
+ * supports new vs putback
+ */
+ spin_lock_irq(&page_pgdat(page)->lru_lock);
+ lru_note_cost_page(page);
+ spin_unlock_irq(&page_pgdat(page)->lru_lock);
lru_cache_add(page);
swap_readpage(page, true);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9b34e03e730a..da374cd3d45b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -471,11 +471,20 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages)
{
+ const unsigned long end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
- unsigned long flags;
+ unsigned long pfn, cur_nr_pages, flags;
/* Poison struct pages because they are now uninitialized again. */
- page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+ for (pfn = start_pfn; pfn < end_pfn; pfn += cur_nr_pages) {
+ cond_resched();
+
+ /* Select all remaining pages up to the next section boundary */
+ cur_nr_pages =
+ min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
+ page_init_poison(pfn_to_page(pfn),
+ sizeof(struct page) * cur_nr_pages);
+ }
#ifdef CONFIG_ZONE_DEVICE
/*
diff --git a/mm/migrate.c b/mm/migrate.c
index f37729673558..40cd7016ae6f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1161,21 +1161,10 @@ out:
}
/*
- * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move(). Work
- * around it.
- */
-#if defined(CONFIG_ARM) && \
- defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700
-#define ICE_noinline noinline
-#else
-#define ICE_noinline
-#endif
-
-/*
* Obtain the lock on page, remove all ptes and migrate the page
* to the newly allocated page in newpage.
*/
-static ICE_noinline int unmap_and_move(new_page_t get_new_page,
+static int unmap_and_move(new_page_t get_new_page,
free_page_t put_new_page,
unsigned long private, struct page *page,
int force, enum migrate_mode mode,
diff --git a/mm/mmap.c b/mm/mmap.c
index 59a4682ebf3f..8c7ca737a19b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
*/
-static void
+static bool
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, unsigned long end)
{
@@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
/* Kill the cache */
vmacache_invalidate(mm);
+
+ /*
+ * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
+ * VM_GROWSUP VMA. Such VMAs can change their size under
+ * down_read(mmap_lock) and collide with the VMA we are about to unmap.
+ */
+ if (vma && (vma->vm_flags & VM_GROWSDOWN))
+ return false;
+ if (prev && (prev->vm_flags & VM_GROWSUP))
+ return false;
+ return true;
}
/*
@@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
}
/* Detach vmas from rbtree */
- detach_vmas_to_be_unmapped(mm, vma, prev, end);
+ if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+ downgrade = false;
if (downgrade)
mmap_write_downgrade(mm);
diff --git a/mm/mremap.c b/mm/mremap.c
index 5dd572d57ca9..6b153dc05fe4 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -206,9 +206,28 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
/*
* The destination pmd shouldn't be established, free_pgtables()
- * should have release it.
+ * should have released it.
+ *
+ * However, there's a case during execve() where we use mremap
+ * to move the initial stack, and in that case the target area
+ * may overlap the source area (always moving down).
+ *
+ * If everything is PMD-aligned, that works fine, as moving
+ * each pmd down will clear the source pmd. But if we first
+ * have a few 4kB-only pages that get moved down, and then
+ * hit the "now the rest is PMD-aligned, let's do everything
+ * one pmd at a time", we will still have the old (now empty
+ * of any 4kB pages, but still there) PMD in the page table
+ * tree.
+ *
+ * Warn on it once - because we really should try to figure
+ * out how to do this better - but then say "I won't move
+ * this pmd".
+ *
+ * One alternative might be to just unmap the target pmd at
+ * this point, and verify that it really is empty. We'll see.
*/
- if (WARN_ON(!pmd_none(*new_pmd)))
+ if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index cdcad5d61dd1..f32a69095d50 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -291,23 +291,6 @@ void *vzalloc_node(unsigned long size, int node)
EXPORT_SYMBOL(vzalloc_node);
/**
- * vmalloc_exec - allocate virtually contiguous, executable memory
- * @size: allocation size
- *
- * Kernel-internal function to allocate enough pages to cover @size
- * the page level allocator and map them into contiguous and
- * executable kernel virtual space.
- *
- * For tight control over page level allocator and protection flags
- * use __vmalloc() instead.
- */
-
-void *vmalloc_exec(unsigned long size)
-{
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM);
-}
-
-/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
* @size: allocation size
*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48eb0f1410d4..e028b87ce294 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7832,7 +7832,7 @@ void setup_per_zone_wmarks(void)
* Initialise min_free_kbytes.
*
* For small machines we want it small (128k min). For large machines
- * we want it large (64MB max). But it is not linear, because network
+ * we want it large (256MB max). But it is not linear, because network
* bandwidth does not increase linearly with machine size. We use
*
* min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy:
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index 5e313fa93276..2a99df7beeb3 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -25,7 +25,7 @@ void rodata_test(void)
}
/* test 2: write to the variable; this should fault */
- if (!probe_kernel_write((void *)&rodata_test_data,
+ if (!copy_to_kernel_nofault((void *)&rodata_test_data,
(void *)&zero, sizeof(zero))) {
pr_err("test data was not read only\n");
return;
diff --git a/mm/shmem.c b/mm/shmem.c
index a0dbe62f8042..b2abca3f7f33 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3178,7 +3178,7 @@ static int shmem_initxattrs(struct inode *inode,
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
GFP_KERNEL);
if (!new_xattr->name) {
- kfree(new_xattr);
+ kvfree(new_xattr);
return -ENOMEM;
}
diff --git a/mm/slab.h b/mm/slab.h
index 207c83ef6e06..74f7e09a7cfd 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -348,7 +348,7 @@ static __always_inline int memcg_charge_slab(struct page *page,
gfp_t gfp, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
int ret;
@@ -388,7 +388,7 @@ out:
static __always_inline void memcg_uncharge_slab(struct page *page, int order,
struct kmem_cache *s)
{
- unsigned int nr_pages = 1 << order;
+ int nr_pages = 1 << order;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 9e72ba224175..fe8b68482670 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -326,6 +326,14 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->refcount < 0)
return 1;
+#ifdef CONFIG_MEMCG_KMEM
+ /*
+ * Skip the dying kmem_cache.
+ */
+ if (s->memcg_params.dying)
+ return 1;
+#endif
+
return 0;
}
@@ -886,12 +894,15 @@ static int shutdown_memcg_caches(struct kmem_cache *s)
return 0;
}
-static void flush_memcg_workqueue(struct kmem_cache *s)
+static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
{
spin_lock_irq(&memcg_kmem_wq_lock);
s->memcg_params.dying = true;
spin_unlock_irq(&memcg_kmem_wq_lock);
+}
+static void flush_memcg_workqueue(struct kmem_cache *s)
+{
/*
* SLAB and SLUB deactivate the kmem_caches through call_rcu. Make
* sure all registered rcu callbacks have been invoked.
@@ -923,10 +934,6 @@ static inline int shutdown_memcg_caches(struct kmem_cache *s)
{
return 0;
}
-
-static inline void flush_memcg_workqueue(struct kmem_cache *s)
-{
-}
#endif /* CONFIG_MEMCG_KMEM */
void slab_kmem_cache_release(struct kmem_cache *s)
@@ -944,8 +951,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
if (unlikely(!s))
return;
- flush_memcg_workqueue(s);
-
get_online_cpus();
get_online_mems();
@@ -955,6 +960,22 @@ void kmem_cache_destroy(struct kmem_cache *s)
if (s->refcount)
goto out_unlock;
+#ifdef CONFIG_MEMCG_KMEM
+ memcg_set_kmem_cache_dying(s);
+
+ mutex_unlock(&slab_mutex);
+
+ put_online_mems();
+ put_online_cpus();
+
+ flush_memcg_workqueue(s);
+
+ get_online_cpus();
+ get_online_mems();
+
+ mutex_lock(&slab_mutex);
+#endif
+
err = shutdown_memcg_caches(s);
if (!err)
err = shutdown_cache(s);
@@ -1726,7 +1747,7 @@ void kzfree(const void *p)
if (unlikely(ZERO_OR_NULL_PTR(mem)))
return;
ks = ksize(mem);
- memset(mem, 0, ks);
+ memzero_explicit(mem, ks);
kfree(mem);
}
EXPORT_SYMBOL(kzfree);
diff --git a/mm/slub.c b/mm/slub.c
index b8f798b50d44..ef303070d175 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -292,7 +292,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
return get_freepointer(s, object);
freepointer_addr = (unsigned long)object + s->offset;
- probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
+ copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
return freelist_ptr(s, p, freepointer_addr);
}
@@ -3766,15 +3766,13 @@ error:
}
static void list_slab_objects(struct kmem_cache *s, struct page *page,
- const char *text, unsigned long *map)
+ const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
+ unsigned long *map;
void *p;
- if (!map)
- return;
-
slab_err(s, page, text, s->name);
slab_lock(page);
@@ -3786,6 +3784,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
print_tracking(s, p);
}
}
+ put_map(map);
slab_unlock(page);
#endif
}
@@ -3799,11 +3798,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
struct page *page, *h;
- unsigned long *map = NULL;
-
-#ifdef CONFIG_SLUB_DEBUG
- map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
-#endif
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
@@ -3813,16 +3807,11 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
list_add(&page->slab_list, &discard);
} else {
list_slab_objects(s, page,
- "Objects remaining in %s on __kmem_cache_shutdown()",
- map);
+ "Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
-#ifdef CONFIG_SLUB_DEBUG
- bitmap_free(map);
-#endif
-
list_for_each_entry_safe(page, h, &discard, slab_list)
discard_slab(s, page);
}
diff --git a/mm/swap.c b/mm/swap.c
index dbcab84c6fce..a82efc33411f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -443,8 +443,7 @@ void mark_page_accessed(struct page *page)
else
__lru_cache_activate_page(page);
ClearPageReferenced(page);
- if (page_is_file_lru(page))
- workingset_activation(page);
+ workingset_activation(page);
}
if (page_is_idle(page))
clear_page_idle(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e98ff460e9e9..05889e8e3c97 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-
+#include "internal.h"
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -429,7 +429,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
__SetPageSwapBacked(page);
/* May fail (-ENOMEM) if XArray node allocation failed. */
- if (add_to_swap_cache(page, entry, gfp_mask & GFP_KERNEL)) {
+ if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) {
put_swap_page(page, entry);
goto fail_unlock;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3091c2ca60df..5a2b55c8dd9a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1862,7 +1862,6 @@ EXPORT_SYMBOL(vm_unmap_ram);
* @pages: an array of pointers to the pages to be mapped
* @count: number of pages
* @node: prefer to allocate data structures on this node
- * @prot: memory protection to use. PAGE_KERNEL for regular RAM
*
* If you use this function for less than VMAP_MAX_ALLOC pages, it could be
* faster than vmap so it's good. But if you mix long-life and short-life
@@ -2696,26 +2695,6 @@ void *vzalloc_node(unsigned long size, int node)
}
EXPORT_SYMBOL(vzalloc_node);
-/**
- * vmalloc_exec - allocate virtually contiguous, executable memory
- * @size: allocation size
- *
- * Kernel-internal function to allocate enough pages to cover @size
- * the page level allocator and map them into contiguous and
- * executable kernel virtual space.
- *
- * For tight control over page level allocator and protection flags
- * use __vmalloc() instead.
- *
- * Return: pointer to the allocated memory or %NULL on error
- */
-void *vmalloc_exec(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b6d84326bdf2..749d239c62b2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -904,6 +904,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
__delete_from_swap_cache(page, swap);
xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap);
+ workingset_eviction(page, target_memcg);
} else {
void (*freepage)(struct page *);
void *shadow = NULL;
@@ -1884,6 +1885,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
list_add(&page->lru, &pages_to_free);
} else {
nr_moved += nr_pages;
+ if (PageActive(page))
+ workingset_age_nonresident(lruvec, nr_pages);
}
}
diff --git a/mm/workingset.c b/mm/workingset.c
index d481ea452eeb..50b7937bab32 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -156,8 +156,8 @@
*
* Implementation
*
- * For each node's file LRU lists, a counter for inactive evictions
- * and activations is maintained (node->inactive_age).
+ * For each node's LRU lists, a counter for inactive evictions and
+ * activations is maintained (node->nonresident_age).
*
* On eviction, a snapshot of this counter (along with some bits to
* identify the node) is stored in the now empty page cache
@@ -213,7 +213,17 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
*workingsetp = workingset;
}
-static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
+/**
+ * workingset_age_nonresident - age non-resident entries as LRU ages
+ * @memcg: the lruvec that was aged
+ * @nr_pages: the number of pages to count
+ *
+ * As in-memory pages are aged, non-resident pages need to be aged as
+ * well, in order for the refault distances later on to be comparable
+ * to the in-memory dimensions. This function allows reclaim and LRU
+ * operations to drive the non-resident aging along in parallel.
+ */
+void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages)
{
/*
* Reclaiming a cgroup means reclaiming all its children in a
@@ -227,11 +237,8 @@ static void advance_inactive_age(struct mem_cgroup *memcg, pg_data_t *pgdat)
* the root cgroup's, age as well.
*/
do {
- struct lruvec *lruvec;
-
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- atomic_long_inc(&lruvec->inactive_age);
- } while (memcg && (memcg = parent_mem_cgroup(memcg)));
+ atomic_long_add(nr_pages, &lruvec->nonresident_age);
+ } while ((lruvec = parent_lruvec(lruvec)));
}
/**
@@ -254,12 +261,11 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
- advance_inactive_age(page_memcg(page), pgdat);
-
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
+ workingset_age_nonresident(lruvec, hpage_nr_pages(page));
/* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
- eviction = atomic_long_read(&lruvec->inactive_age);
+ eviction = atomic_long_read(&lruvec->nonresident_age);
return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
}
@@ -309,20 +315,20 @@ void workingset_refault(struct page *page, void *shadow)
if (!mem_cgroup_disabled() && !eviction_memcg)
goto out;
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
- refault = atomic_long_read(&eviction_lruvec->inactive_age);
+ refault = atomic_long_read(&eviction_lruvec->nonresident_age);
/*
* Calculate the refault distance
*
* The unsigned subtraction here gives an accurate distance
- * across inactive_age overflows in most cases. There is a
+ * across nonresident_age overflows in most cases. There is a
* special case: usually, shadow entries have a short lifetime
* and are either refaulted or reclaimed along with the inode
* before they get too old. But it is not impossible for the
- * inactive_age to lap a shadow entry in the field, which can
- * then result in a false small refault distance, leading to a
- * false activation should this old entry actually refault
- * again. However, earlier kernels used to deactivate
+ * nonresident_age to lap a shadow entry in the field, which
+ * can then result in a false small refault distance, leading
+ * to a false activation should this old entry actually
+ * refault again. However, earlier kernels used to deactivate
* unconditionally with *every* reclaim invocation for the
* longest time, so the occasional inappropriate activation
* leading to pressure on the active list is not a problem.
@@ -359,7 +365,7 @@ void workingset_refault(struct page *page, void *shadow)
goto out;
SetPageActive(page);
- advance_inactive_age(memcg, pgdat);
+ workingset_age_nonresident(lruvec, hpage_nr_pages(page));
inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
/* Page was active prior to eviction */
@@ -382,6 +388,7 @@ out:
void workingset_activation(struct page *page)
{
struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
rcu_read_lock();
/*
@@ -394,7 +401,8 @@ void workingset_activation(struct page *page)
memcg = page_memcg_rcu(page);
if (!mem_cgroup_disabled() && !memcg)
goto out;
- advance_inactive_age(memcg, page_pgdat(page));
+ lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+ workingset_age_nonresident(lruvec, hpage_nr_pages(page));
out:
rcu_read_unlock();
}