summaryrefslogtreecommitdiff
path: root/mm/mlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mlock.c')
-rw-r--r--mm/mlock.c110
1 files changed, 76 insertions, 34 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 0a0c996c5c21..2f699c3497a5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
if (lruvec)
unlock_page_lruvec_irq(lruvec);
- folios_put(fbatch->folios, folio_batch_count(fbatch));
- folio_batch_reinit(fbatch);
+ folios_put(fbatch);
}
void mlock_drain_local(void)
@@ -256,7 +255,7 @@ void mlock_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -279,7 +278,7 @@ void mlock_new_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_new(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -300,11 +299,54 @@ void munlock_folio(struct folio *folio)
*/
folio_get(folio);
if (!folio_batch_add(fbatch, folio) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
+static inline unsigned int folio_mlock_step(struct folio *folio,
+ pte_t *pte, unsigned long addr, unsigned long end)
+{
+ unsigned int count = (end - addr) >> PAGE_SHIFT;
+ pte_t ptent = ptep_get(pte);
+
+ if (!folio_test_large(folio))
+ return 1;
+
+ return folio_pte_batch(folio, pte, ptent, count);
+}
+
+static inline bool allow_mlock_munlock(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned int step)
+{
+ /*
+ * For unlock, allow munlock large folio which is partially
+ * mapped to VMA. As it's possible that large folio is
+ * mlocked and VMA is split later.
+ *
+ * During memory pressure, such kind of large folio can
+ * be split. And the pages are not in VM_LOCKed VMA
+ * can be reclaimed.
+ */
+ if (!(vma->vm_flags & VM_LOCKED))
+ return true;
+
+ /* folio_within_range() cannot take KSM, but any small folio is OK */
+ if (!folio_test_large(folio))
+ return true;
+
+ /* folio not in range [start, end), skip mlock */
+ if (!folio_within_range(folio, vma, start, end))
+ return false;
+
+ /* folio is not fully mapped, skip mlock */
+ if (step != folio_nr_pages(folio))
+ return false;
+
+ return true;
+}
+
static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
@@ -314,6 +356,8 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
pte_t *start_pte, *pte;
pte_t ptent;
struct folio *folio;
+ unsigned int step = 1;
+ unsigned long start = addr;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
@@ -321,7 +365,9 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
if (is_huge_zero_pmd(*pmd))
goto out;
- folio = page_folio(pmd_page(*pmd));
+ folio = pmd_folio(*pmd);
+ if (folio_is_zone_device(folio))
+ goto out;
if (vma->vm_flags & VM_LOCKED)
mlock_folio(folio);
else
@@ -334,6 +380,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
walk->action = ACTION_AGAIN;
return 0;
}
+
for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
ptent = ptep_get(pte);
if (!pte_present(ptent))
@@ -341,12 +388,19 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
folio = vm_normal_folio(vma, addr, ptent);
if (!folio || folio_is_zone_device(folio))
continue;
- if (folio_test_large(folio))
- continue;
+
+ step = folio_mlock_step(folio, pte, addr, end);
+ if (!allow_mlock_munlock(folio, vma, start, end, step))
+ goto next_entry;
+
if (vma->vm_flags & VM_LOCKED)
mlock_folio(folio);
else
munlock_folio(folio);
+
+next_entry:
+ pte += step - 1;
+ addr += (step - 1) << PAGE_SHIFT;
}
pte_unmap(start_pte);
out:
@@ -371,6 +425,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
{
static const struct mm_walk_ops mlock_walk_ops = {
.pmd_entry = mlock_pte_range,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
/*
@@ -386,6 +441,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
*/
if (newflags & VM_LOCKED)
newflags |= VM_IO;
+ vma_start_write(vma);
vm_flags_reset_once(vma, newflags);
lru_add_drain();
@@ -412,39 +468,22 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long end, vm_flags_t newflags)
{
struct mm_struct *mm = vma->vm_mm;
- pgoff_t pgoff;
int nr_pages;
int ret = 0;
vm_flags_t oldflags = vma->vm_flags;
if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
- vma_is_dax(vma) || vma_is_secretmem(vma))
+ vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE))
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
goto out;
- pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- *prev = vma_merge(vmi, mm, *prev, start, end, newflags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx, anon_vma_name(vma));
- if (*prev) {
- vma = *prev;
- goto success;
- }
-
- if (start != vma->vm_start) {
- ret = split_vma(vmi, vma, start, 1);
- if (ret)
- goto out;
- }
-
- if (end != vma->vm_end) {
- ret = split_vma(vmi, vma, end, 0);
- if (ret)
- goto out;
+ vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out;
}
-success:
/*
* Keep track of amount of locked VM.
*/
@@ -460,9 +499,9 @@ success:
* It's okay if try_to_unmap_one unmaps a page just after we
* set VM_LOCKED, populate_vma_page_range will bring it back.
*/
-
if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
/* No work to do, and mlocking twice would be wrong */
+ vma_start_write(vma);
vm_flags_reset(vma, newflags);
} else {
mlock_vma_pages_range(vma, start, end, newflags);
@@ -686,14 +725,17 @@ static int apply_mlockall_flags(int flags)
}
for_each_vma(vmi, vma) {
+ int error;
vm_flags_t newflags;
newflags = vma->vm_flags & ~VM_LOCKED_MASK;
newflags |= to_add;
- /* Ignore errors */
- mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end,
- newflags);
+ error = mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end,
+ newflags);
+ /* Ignore errors, but prev needs fixing up. */
+ if (error)
+ prev = vma;
cond_resched();
}
out: