summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
authorLorenzo Stoakes <lorenzo.stoakes@oracle.com>2025-04-16 11:38:36 +0100
committerAndrew Morton <akpm@linux-foundation.org>2025-05-11 17:48:33 -0700
commit75404e07663b1622948944cf31531fa87cb1785d (patch)
treec367325f4857eae6927e34d4e4a0ea84c9b52f1d /mm/memory.c
parentf735eebe55f8f61758fe014bd0b02ab50b059e4d (diff)
mm: move mmap/vma locking logic into specific files
Currently the VMA and mmap locking logic is entangled in two of the most overwrought files in mm - include/linux/mm.h and mm/memory.c. Separate this logic out so we can more easily make changes and create an appropriate MAINTAINERS entry that spans only the logic relating to locking. This should have no functional change. Care is taken to avoid dependency loops, we must regrettably keep release_fault_lock() and assert_fault_locked() in mm.h as a result due to the dependence on the vm_fault type. Additionally we must declare rcuwait_wake_up() manually to avoid a dependency cycle on linux/rcuwait.h. Additionally move the nommu implementatino of lock_mm_and_find_vma() to mmap_lock.c so everything lock-related is in one place. Link: https://lkml.kernel.org/r/bec6c8e29fa8de9267a811a10b1bdae355d67ed4.1744799282.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: "Paul E . McKenney" <paulmck@kernel.org> Cc: SeongJae Park <sj@kernel.org> Cc: Shakeel Butt <shakeel.butt@linux.dev> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c252
1 files changed, 0 insertions, 252 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 71c255f3fdcc..f18266b5a0a9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6378,258 +6378,6 @@ out:
}
EXPORT_SYMBOL_GPL(handle_mm_fault);
-#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
-#include <linux/extable.h>
-
-static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
-{
- if (likely(mmap_read_trylock(mm)))
- return true;
-
- if (regs && !user_mode(regs)) {
- unsigned long ip = exception_ip(regs);
- if (!search_exception_tables(ip))
- return false;
- }
-
- return !mmap_read_lock_killable(mm);
-}
-
-static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
-{
- /*
- * We don't have this operation yet.
- *
- * It should be easy enough to do: it's basically a
- * atomic_long_try_cmpxchg_acquire()
- * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
- * it also needs the proper lockdep magic etc.
- */
- return false;
-}
-
-static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
-{
- mmap_read_unlock(mm);
- if (regs && !user_mode(regs)) {
- unsigned long ip = exception_ip(regs);
- if (!search_exception_tables(ip))
- return false;
- }
- return !mmap_write_lock_killable(mm);
-}
-
-/*
- * Helper for page fault handling.
- *
- * This is kind of equivalent to "mmap_read_lock()" followed
- * by "find_extend_vma()", except it's a lot more careful about
- * the locking (and will drop the lock on failure).
- *
- * For example, if we have a kernel bug that causes a page
- * fault, we don't want to just use mmap_read_lock() to get
- * the mm lock, because that would deadlock if the bug were
- * to happen while we're holding the mm lock for writing.
- *
- * So this checks the exception tables on kernel faults in
- * order to only do this all for instructions that are actually
- * expected to fault.
- *
- * We can also actually take the mm lock for writing if we
- * need to extend the vma, which helps the VM layer a lot.
- */
-struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
- unsigned long addr, struct pt_regs *regs)
-{
- struct vm_area_struct *vma;
-
- if (!get_mmap_lock_carefully(mm, regs))
- return NULL;
-
- vma = find_vma(mm, addr);
- if (likely(vma && (vma->vm_start <= addr)))
- return vma;
-
- /*
- * Well, dang. We might still be successful, but only
- * if we can extend a vma to do so.
- */
- if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
- mmap_read_unlock(mm);
- return NULL;
- }
-
- /*
- * We can try to upgrade the mmap lock atomically,
- * in which case we can continue to use the vma
- * we already looked up.
- *
- * Otherwise we'll have to drop the mmap lock and
- * re-take it, and also look up the vma again,
- * re-checking it.
- */
- if (!mmap_upgrade_trylock(mm)) {
- if (!upgrade_mmap_lock_carefully(mm, regs))
- return NULL;
-
- vma = find_vma(mm, addr);
- if (!vma)
- goto fail;
- if (vma->vm_start <= addr)
- goto success;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto fail;
- }
-
- if (expand_stack_locked(vma, addr))
- goto fail;
-
-success:
- mmap_write_downgrade(mm);
- return vma;
-
-fail:
- mmap_write_unlock(mm);
- return NULL;
-}
-#endif
-
-#ifdef CONFIG_PER_VMA_LOCK
-static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching)
-{
- unsigned int tgt_refcnt = VMA_LOCK_OFFSET;
-
- /* Additional refcnt if the vma is attached. */
- if (!detaching)
- tgt_refcnt++;
-
- /*
- * If vma is detached then only vma_mark_attached() can raise the
- * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached().
- */
- if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt))
- return false;
-
- rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_);
- rcuwait_wait_event(&vma->vm_mm->vma_writer_wait,
- refcount_read(&vma->vm_refcnt) == tgt_refcnt,
- TASK_UNINTERRUPTIBLE);
- lock_acquired(&vma->vmlock_dep_map, _RET_IP_);
-
- return true;
-}
-
-static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached)
-{
- *detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt);
- rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
-}
-
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq)
-{
- bool locked;
-
- /*
- * __vma_enter_locked() returns false immediately if the vma is not
- * attached, otherwise it waits until refcnt is indicating that vma
- * is attached with no readers.
- */
- locked = __vma_enter_locked(vma, false);
-
- /*
- * We should use WRITE_ONCE() here because we can have concurrent reads
- * from the early lockless pessimistic check in vma_start_read().
- * We don't really care about the correctness of that early check, but
- * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
- */
- WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
-
- if (locked) {
- bool detached;
-
- __vma_exit_locked(vma, &detached);
- WARN_ON_ONCE(detached); /* vma should remain attached */
- }
-}
-EXPORT_SYMBOL_GPL(__vma_start_write);
-
-void vma_mark_detached(struct vm_area_struct *vma)
-{
- vma_assert_write_locked(vma);
- vma_assert_attached(vma);
-
- /*
- * We are the only writer, so no need to use vma_refcount_put().
- * The condition below is unlikely because the vma has been already
- * write-locked and readers can increment vm_refcnt only temporarily
- * before they check vm_lock_seq, realize the vma is locked and drop
- * back the vm_refcnt. That is a narrow window for observing a raised
- * vm_refcnt.
- */
- if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
- /* Wait until vma is detached with no readers. */
- if (__vma_enter_locked(vma, true)) {
- bool detached;
-
- __vma_exit_locked(vma, &detached);
- WARN_ON_ONCE(!detached);
- }
- }
-}
-
-/*
- * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
- * stable and not isolated. If the VMA is not found or is being modified the
- * function returns NULL.
- */
-struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
- unsigned long address)
-{
- MA_STATE(mas, &mm->mm_mt, address, address);
- struct vm_area_struct *vma;
-
- rcu_read_lock();
-retry:
- vma = mas_walk(&mas);
- if (!vma)
- goto inval;
-
- vma = vma_start_read(mm, vma);
- if (IS_ERR_OR_NULL(vma)) {
- /* Check if the VMA got isolated after we found it */
- if (PTR_ERR(vma) == -EAGAIN) {
- count_vm_vma_lock_event(VMA_LOCK_MISS);
- /* The area was replaced with another one */
- goto retry;
- }
-
- /* Failed to lock the VMA */
- goto inval;
- }
- /*
- * At this point, we have a stable reference to a VMA: The VMA is
- * locked and we know it hasn't already been isolated.
- * From here on, we can access the VMA without worrying about which
- * fields are accessible for RCU readers.
- */
-
- /* Check if the vma we locked is the right one. */
- if (unlikely(vma->vm_mm != mm ||
- address < vma->vm_start || address >= vma->vm_end))
- goto inval_end_read;
-
- rcu_read_unlock();
- return vma;
-
-inval_end_read:
- vma_end_read(vma);
-inval:
- rcu_read_unlock();
- count_vm_vma_lock_event(VMA_LOCK_ABORT);
- return NULL;
-}
-#endif /* CONFIG_PER_VMA_LOCK */
-
#ifndef __PAGETABLE_P4D_FOLDED
/*
* Allocate p4d page table.