summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mm/internal.h9
-rw-r--r--mm/mlock.c219
-rw-r--r--mm/swap.c27
3 files changed, 208 insertions, 47 deletions
diff --git a/mm/internal.h b/mm/internal.h
index b3f0dd3ffba2..18af980bb1b8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -402,7 +402,8 @@ extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
*
* mlock is usually called at the end of page_add_*_rmap(),
* munlock at the end of page_remove_rmap(); but new anon
- * pages are managed in lru_cache_add_inactive_or_unevictable().
+ * pages are managed by lru_cache_add_inactive_or_unevictable()
+ * calling mlock_new_page().
*
* @compound is used to include pmd mappings of THPs, but filter out
* pte mappings of THPs, which cannot be consistently counted: a pte
@@ -425,6 +426,9 @@ static inline void munlock_vma_page(struct page *page,
(compound || !PageTransCompound(page)))
munlock_page(page);
}
+void mlock_new_page(struct page *page);
+bool need_mlock_page_drain(int cpu);
+void mlock_page_drain(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
@@ -503,6 +507,9 @@ static inline void mlock_vma_page(struct page *page,
struct vm_area_struct *vma, bool compound) { }
static inline void munlock_vma_page(struct page *page,
struct vm_area_struct *vma, bool compound) { }
+static inline void mlock_new_page(struct page *page) { }
+static inline bool need_mlock_page_drain(int cpu) { return false; }
+static inline void mlock_page_drain(int cpu) { }
static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
{
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 581ea8bf1b83..d28e56529e5b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -28,6 +28,8 @@
#include "internal.h"
+static DEFINE_PER_CPU(struct pagevec, mlock_pvec);
+
bool can_do_mlock(void)
{
if (rlimit(RLIMIT_MEMLOCK) != 0)
@@ -49,57 +51,79 @@ EXPORT_SYMBOL(can_do_mlock);
* PageUnevictable is set to indicate the unevictable state.
*/
-/**
- * mlock_page - mlock a page
- * @page: page to be mlocked, either a normal page or a THP head.
- */
-void mlock_page(struct page *page)
+static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec)
{
- struct lruvec *lruvec;
- int nr_pages = thp_nr_pages(page);
+ /* There is nothing more we can do while it's off LRU */
+ if (!TestClearPageLRU(page))
+ return lruvec;
- VM_BUG_ON_PAGE(PageTail(page), page);
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
- if (!TestSetPageMlocked(page)) {
- mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
- __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
+ if (unlikely(page_evictable(page))) {
+ /*
+ * This is a little surprising, but quite possible:
+ * PageMlocked must have got cleared already by another CPU.
+ * Could this page be on the Unevictable LRU? I'm not sure,
+ * but move it now if so.
+ */
+ if (PageUnevictable(page)) {
+ del_page_from_lru_list(page, lruvec);
+ ClearPageUnevictable(page);
+ add_page_to_lru_list(page, lruvec);
+ __count_vm_events(UNEVICTABLE_PGRESCUED,
+ thp_nr_pages(page));
+ }
+ goto out;
}
- /* There is nothing more we can do while it's off LRU */
- if (!TestClearPageLRU(page))
- return;
-
- lruvec = folio_lruvec_lock_irq(page_folio(page));
if (PageUnevictable(page)) {
- page->mlock_count++;
+ if (PageMlocked(page))
+ page->mlock_count++;
goto out;
}
del_page_from_lru_list(page, lruvec);
ClearPageActive(page);
SetPageUnevictable(page);
- page->mlock_count = 1;
+ page->mlock_count = !!PageMlocked(page);
add_page_to_lru_list(page, lruvec);
- __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
+ __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
out:
SetPageLRU(page);
- unlock_page_lruvec_irq(lruvec);
+ return lruvec;
}
-/**
- * munlock_page - munlock a page
- * @page: page to be munlocked, either a normal page or a THP head.
- */
-void munlock_page(struct page *page)
+static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
+
+ /* As above, this is a little surprising, but possible */
+ if (unlikely(page_evictable(page)))
+ goto out;
+
+ SetPageUnevictable(page);
+ page->mlock_count = !!PageMlocked(page);
+ __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page));
+out:
+ add_page_to_lru_list(page, lruvec);
+ SetPageLRU(page);
+ return lruvec;
+}
+
+static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec)
{
- struct lruvec *lruvec;
int nr_pages = thp_nr_pages(page);
+ bool isolated = false;
+
+ if (!TestClearPageLRU(page))
+ goto munlock;
- VM_BUG_ON_PAGE(PageTail(page), page);
+ isolated = true;
+ lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec);
- lock_page_memcg(page);
- lruvec = folio_lruvec_lock_irq(page_folio(page));
- if (PageLRU(page) && PageUnevictable(page)) {
+ if (PageUnevictable(page)) {
/* Then mlock_count is maintained, but might undercount */
if (page->mlock_count)
page->mlock_count--;
@@ -108,24 +132,151 @@ void munlock_page(struct page *page)
}
/* else assume that was the last mlock: reclaim will fix it if not */
+munlock:
if (TestClearPageMlocked(page)) {
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
- if (PageLRU(page) || !PageUnevictable(page))
+ if (isolated || !PageUnevictable(page))
__count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
else
__count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
}
/* page_evictable() has to be checked *after* clearing Mlocked */
- if (PageLRU(page) && PageUnevictable(page) && page_evictable(page)) {
+ if (isolated && PageUnevictable(page) && page_evictable(page)) {
del_page_from_lru_list(page, lruvec);
ClearPageUnevictable(page);
add_page_to_lru_list(page, lruvec);
__count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
}
out:
- unlock_page_lruvec_irq(lruvec);
- unlock_page_memcg(page);
+ if (isolated)
+ SetPageLRU(page);
+ return lruvec;
+}
+
+/*
+ * Flags held in the low bits of a struct page pointer on the mlock_pvec.
+ */
+#define LRU_PAGE 0x1
+#define NEW_PAGE 0x2
+static inline struct page *mlock_lru(struct page *page)
+{
+ return (struct page *)((unsigned long)page + LRU_PAGE);
+}
+
+static inline struct page *mlock_new(struct page *page)
+{
+ return (struct page *)((unsigned long)page + NEW_PAGE);
+}
+
+/*
+ * mlock_pagevec() is derived from pagevec_lru_move_fn():
+ * perhaps that can make use of such page pointer flags in future,
+ * but for now just keep it for mlock. We could use three separate
+ * pagevecs instead, but one feels better (munlocking a full pagevec
+ * does not need to drain mlocking pagevecs first).
+ */
+static void mlock_pagevec(struct pagevec *pvec)
+{
+ struct lruvec *lruvec = NULL;
+ unsigned long mlock;
+ struct page *page;
+ int i;
+
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ page = pvec->pages[i];
+ mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE);
+ page = (struct page *)((unsigned long)page - mlock);
+ pvec->pages[i] = page;
+
+ if (mlock & LRU_PAGE)
+ lruvec = __mlock_page(page, lruvec);
+ else if (mlock & NEW_PAGE)
+ lruvec = __mlock_new_page(page, lruvec);
+ else
+ lruvec = __munlock_page(page, lruvec);
+ }
+
+ if (lruvec)
+ unlock_page_lruvec_irq(lruvec);
+ release_pages(pvec->pages, pvec->nr);
+ pagevec_reinit(pvec);
+}
+
+void mlock_page_drain(int cpu)
+{
+ struct pagevec *pvec;
+
+ pvec = &per_cpu(mlock_pvec, cpu);
+ if (pagevec_count(pvec))
+ mlock_pagevec(pvec);
+}
+
+bool need_mlock_page_drain(int cpu)
+{
+ return pagevec_count(&per_cpu(mlock_pvec, cpu));
+}
+
+/**
+ * mlock_page - mlock a page already on (or temporarily off) LRU
+ * @page: page to be mlocked, either a normal page or a THP head.
+ */
+void mlock_page(struct page *page)
+{
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
+
+ if (!TestSetPageMlocked(page)) {
+ int nr_pages = thp_nr_pages(page);
+
+ mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+ __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
+ }
+
+ get_page(page);
+ if (!pagevec_add(pvec, mlock_lru(page)) ||
+ PageHead(page) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
+}
+
+/**
+ * mlock_new_page - mlock a newly allocated page not yet on LRU
+ * @page: page to be mlocked, either a normal page or a THP head.
+ */
+void mlock_new_page(struct page *page)
+{
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
+ int nr_pages = thp_nr_pages(page);
+
+ SetPageMlocked(page);
+ mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+ __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
+
+ get_page(page);
+ if (!pagevec_add(pvec, mlock_new(page)) ||
+ PageHead(page) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
+}
+
+/**
+ * munlock_page - munlock a page
+ * @page: page to be munlocked, either a normal page or a THP head.
+ */
+void munlock_page(struct page *page)
+{
+ struct pagevec *pvec = &get_cpu_var(mlock_pvec);
+
+ /*
+ * TestClearPageMlocked(page) must be left to __munlock_page(),
+ * which will check whether the page is multiply mlocked.
+ */
+
+ get_page(page);
+ if (!pagevec_add(pvec, page) ||
+ PageHead(page) || lru_cache_disabled())
+ mlock_pagevec(pvec);
+ put_cpu_var(mlock_pvec);
}
static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
diff --git a/mm/swap.c b/mm/swap.c
index 3f770b1ea2c1..842d5cd92cf6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -490,18 +490,12 @@ EXPORT_SYMBOL(folio_add_lru);
void lru_cache_add_inactive_or_unevictable(struct page *page,
struct vm_area_struct *vma)
{
- bool unevictable;
-
VM_BUG_ON_PAGE(PageLRU(page), page);
- unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
- if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
- int nr_pages = thp_nr_pages(page);
-
- mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
- count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
- }
- lru_cache_add(page);
+ if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
+ mlock_new_page(page);
+ else
+ lru_cache_add(page);
}
/*
@@ -640,6 +634,7 @@ void lru_add_drain_cpu(int cpu)
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
activate_page_drain(cpu);
+ mlock_page_drain(cpu);
}
/**
@@ -842,6 +837,7 @@ inline void __lru_add_drain_all(bool force_all_cpus)
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
need_activate_page_drain(cpu) ||
+ need_mlock_page_drain(cpu) ||
has_bh_in_lru(cpu, NULL)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
@@ -1030,7 +1026,7 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
* Is an smp_mb__after_atomic() still required here, before
* folio_evictable() tests PageMlocked, to rule out the possibility
* of stranding an evictable folio on an unevictable LRU? I think
- * not, because munlock_page() only clears PageMlocked while the LRU
+ * not, because __munlock_page() only clears PageMlocked while the LRU
* lock is held.
*
* (That is not true of __page_cache_release(), and not necessarily
@@ -1043,7 +1039,14 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
} else {
folio_clear_active(folio);
folio_set_unevictable(folio);
- folio->mlock_count = !!folio_test_mlocked(folio);
+ /*
+ * folio->mlock_count = !!folio_test_mlocked(folio)?
+ * But that leaves __mlock_page() in doubt whether another
+ * actor has already counted the mlock or not. Err on the
+ * safe side, underestimate, let page reclaim fix it, rather
+ * than leaving a page on the unevictable LRU indefinitely.
+ */
+ folio->mlock_count = 0;
if (!was_unevictable)
__count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
}