summaryrefslogtreecommitdiff
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-15 15:17:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-24 14:12:54 -0700
commitc2508ec5a58db67093f4fb8bf89a9a7c53a109e9 (patch)
tree8595d666292abba0d7c460191819fa6caa3d44bf /arch/x86/mm/fault.c
parent45a3e24f65e90a047bef86f927ebdc4c710edaa1 (diff)
mm: introduce new 'lock_mm_and_find_vma()' page fault helper
.. and make x86 use it. This basically extracts the existing x86 "find and expand faulting vma" code, but extends it to also take the mmap lock for writing in case we actually do need to expand the vma. We've historically short-circuited that case, and have some rather ugly special logic to serialize the stack segment expansion (since we only hold the mmap lock for reading) that doesn't match the normal VM locking. That slight violation of locking worked well, right up until it didn't: the maple tree code really does want proper locking even for simple extension of an existing vma. So extract the code for "look up the vma of the fault" from x86, fix it up to do the necessary write locking, and make it available as a helper function for other architectures that can use the common helper. Note: I say "common helper", but it really only handles the normal stack-grows-down case. Which is all architectures except for PA-RISC and IA64. So some rare architectures can't use the helper, but if they care they'll just need to open-code this logic. It's also worth pointing out that this code really would like to have an optimistic "mmap_upgrade_trylock()" to make it quicker to go from a read-lock (for the common case) to taking the write lock (for having to extend the vma) in the normal single-threaded situation where there is no other locking activity. But that _is_ all the very uncommon special case, so while it would be nice to have such an operation, it probably doesn't matter in reality. I did put in the skeleton code for such a possible future expansion, even if it only acts as pseudo-documentation for what we're doing. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c52
1 files changed, 2 insertions, 50 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..e8711b2cafaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
- __bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
static inline bool bad_area_access_from_pkeys(unsigned long error_code,
struct vm_area_struct *vma)
{
@@ -1366,51 +1360,10 @@ void do_user_addr_fault(struct pt_regs *regs,
lock_mmap:
#endif /* CONFIG_PER_VMA_LOCK */
- /*
- * Kernel-mode access to the user address space should only occur
- * on well-defined single instructions listed in the exception
- * tables. But, an erroneous kernel fault occurring outside one of
- * those areas which also holds mmap_lock might deadlock attempting
- * to validate the fault against the address space.
- *
- * Only do the expensive exception table search when we might be at
- * risk of a deadlock. This happens if we
- * 1. Failed to acquire mmap_lock, and
- * 2. The access did not originate in userspace.
- */
- if (unlikely(!mmap_read_trylock(mm))) {
- if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
- /*
- * Fault from code in kernel from
- * which we do not expect faults.
- */
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area_nosemaphore(regs, error_code, address);
return;
}
@@ -1418,7 +1371,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address, vma);
return;