summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/mm/fault.c52
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/memory.c121
5 files changed, 130 insertions, 50 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..cb1031018afa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -276,6 +276,7 @@ config X86
select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
+ select LOCK_MM_AND_FIND_VMA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..e8711b2cafaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
- __bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
static inline bool bad_area_access_from_pkeys(unsigned long error_code,
struct vm_area_struct *vma)
{
@@ -1366,51 +1360,10 @@ void do_user_addr_fault(struct pt_regs *regs,
lock_mmap:
#endif /* CONFIG_PER_VMA_LOCK */
- /*
- * Kernel-mode access to the user address space should only occur
- * on well-defined single instructions listed in the exception
- * tables. But, an erroneous kernel fault occurring outside one of
- * those areas which also holds mmap_lock might deadlock attempting
- * to validate the fault against the address space.
- *
- * Only do the expensive exception table search when we might be at
- * risk of a deadlock. This happens if we
- * 1. Failed to acquire mmap_lock, and
- * 2. The access did not originate in userspace.
- */
- if (unlikely(!mmap_read_trylock(mm))) {
- if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
- /*
- * Fault from code in kernel from
- * which we do not expect faults.
- */
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area_nosemaphore(regs, error_code, address);
return;
}
@@ -1418,7 +1371,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address, vma);
return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..570cf906fbcc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2325,6 +2325,8 @@ void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long address, struct pt_regs *regs);
#else
static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
diff --git a/mm/Kconfig b/mm/Kconfig
index 7672a22647b4..e3454087fd31 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1206,6 +1206,10 @@ config PER_VMA_LOCK
This feature allows locking each virtual memory area separately when
handling page faults instead of taking mmap_lock.
+config LOCK_MM_AND_FIND_VMA
+ bool
+ depends on !STACK_GROWSUP
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..1a427097b71f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5262,6 +5262,127 @@ out:
}
EXPORT_SYMBOL_GPL(handle_mm_fault);
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+ /* Even if this succeeds, make it clear we *might* have slept */
+ if (likely(mmap_read_trylock(mm))) {
+ might_sleep();
+ return true;
+ }
+
+ if (regs && !user_mode(regs)) {
+ unsigned long ip = instruction_pointer(regs);
+ if (!search_exception_tables(ip))
+ return false;
+ }
+
+ mmap_read_lock(mm);
+ return true;
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+ mmap_read_unlock(mm);
+ if (regs && !user_mode(regs)) {
+ unsigned long ip = instruction_pointer(regs);
+ if (!search_exception_tables(ip))
+ return false;
+ }
+ mmap_write_lock(mm);
+ return true;
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, struct pt_regs *regs)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, regs))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, regs))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_PER_VMA_LOCK
/*
* Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be