diff options
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 129 |
1 files changed, 52 insertions, 77 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241..296d294142c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,7 +7,6 @@ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_tables */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ @@ -20,6 +19,7 @@ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> #include <linux/mm.h> /* find_and_lock_vma() */ +#include <linux/vmalloc.h> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -34,6 +34,8 @@ #include <asm/kvm_para.h> /* kvm_handle_async_pf */ #include <asm/vdso.h> /* fixup_vdso_exception() */ #include <asm/irq_stack.h> +#include <asm/fred.h> +#include <asm/sev.h> /* snp_dump_hva_rmpentry() */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -250,7 +252,7 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; - if (pmd_large(*pmd_k)) + if (pmd_leaf(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); @@ -319,7 +321,7 @@ static void dump_pagetable(unsigned long address) * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ - if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) + if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); @@ -368,7 +370,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("P4D %lx ", p4d_val(*p4d)); - if (!p4d_present(*p4d) || p4d_large(*p4d)) + if (!p4d_present(*p4d) || p4d_leaf(*p4d)) goto out; pud = pud_offset(p4d, address); @@ -376,7 +378,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud) || pud_large(*pud)) + if (!pud_present(*pud) || pud_leaf(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -384,7 +386,7 @@ static void dump_pagetable(unsigned long address) goto bad; pr_cont("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd) || pmd_large(*pmd)) + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) goto out; pte = pte_offset_kernel(pmd, address); @@ -512,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad if (error_code & X86_PF_INSTR) { unsigned int level; + bool nx, rw; pgd_t *pgd; pte_t *pte; pgd = __va(read_cr3_pa()); pgd += pgd_index(address); - pte = lookup_address_in_pgd(pgd, address, &level); + pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw); - if (pte && pte_present(*pte) && !pte_exec(*pte)) + if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx)) pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", from_kuid(&init_user_ns, current_uid())); - if (pte && pte_present(*pte) && pte_exec(*pte) && + if (pte && pte_present(*pte) && pte_exec(*pte) && !nx && (pgd_flags(*pgd) & _PAGE_USER) && (__read_cr4() & X86_CR4_SMEP)) pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", @@ -547,6 +550,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad !(error_code & X86_PF_PROT) ? "not-present page" : (error_code & X86_PF_RSVD) ? "reserved bit violation" : (error_code & X86_PF_PK) ? "protection keys violation" : + (error_code & X86_PF_RMP) ? "RMP violation" : "permissions violation"); if (!(error_code & X86_PF_USER) && user_mode(regs)) { @@ -579,6 +583,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad } dump_pagetable(address); + + if (error_code & X86_PF_RMP) + snp_dump_hva_rmpentry(address); } static noinline void @@ -670,7 +677,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, ASM_CALL_ARG3, , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info)); - unreachable(); + BUG(); } #endif @@ -717,39 +724,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - } /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH @@ -798,15 +774,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) @@ -868,14 +835,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 pkey, int si_code) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma, u32 pkey, int si_code) { - struct mm_struct *mm = current->mm; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma); __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } @@ -899,7 +869,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma) { /* * This OSPKE check is not strictly necessary at runtime. @@ -929,9 +900,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, */ u32 pkey = vma_pkey(vma); - __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR); } else { - __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR); } } @@ -1039,21 +1010,21 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address) if (!p4d_present(*p4d)) return 0; - if (p4d_large(*p4d)) + if (p4d_leaf(*p4d)) return spurious_kernel_fault_check(error_code, (pte_t *) p4d); pud = pud_offset(p4d, address); if (!pud_present(*pud)) return 0; - if (pud_large(*pud)) + if (pud_leaf(*pud)) return spurious_kernel_fault_check(error_code, (pte_t *) pud); pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; - if (pmd_large(*pmd)) + if (pmd_leaf(*pmd)) return spurious_kernel_fault_check(error_code, (pte_t *) pmd); pte = pte_offset_kernel(pmd, address); @@ -1302,21 +1273,14 @@ void do_user_addr_fault(struct pt_regs *regs, return; } - /* - * It's safe to allow irq's after cr2 has been saved and the - * vmalloc fault has been handled. - * - * User-mode registers count as a user access even for any - * potential system fault or CPU buglet: - */ - if (user_mode(regs)) { - local_irq_enable(); - flags |= FAULT_FLAG_USER; - } else { - if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + /* Legacy check - remove this after verifying that it doesn't trigger */ + if (WARN_ON_ONCE(!(regs->flags & X86_EFLAGS_IF))) { + bad_area_nosemaphore(regs, error_code, address); + return; } + local_irq_enable(); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* @@ -1332,6 +1296,14 @@ void do_user_addr_fault(struct pt_regs *regs, if (error_code & X86_PF_INSTR) flags |= FAULT_FLAG_INSTRUCTION; + /* + * We set FAULT_FLAG_USER based on the register state, not + * based on X86_PF_USER. User space accesses that cause + * system page faults are still user accesses. + */ + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + #ifdef CONFIG_X86_64 /* * Faults in the vsyscall page might need emulation. The @@ -1358,8 +1330,9 @@ void do_user_addr_fault(struct pt_regs *regs, goto lock_mmap; if (unlikely(access_error(error_code, vma))) { - vma_end_read(vma); - goto lock_mmap; + bad_area_access_error(regs, error_code, address, NULL, vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return; } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) @@ -1395,7 +1368,7 @@ retry: * we can handle it.. */ if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address, vma); + bad_area_access_error(regs, error_code, address, mm, vma); return; } @@ -1518,8 +1491,10 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code, DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) { - unsigned long address = read_cr2(); irqentry_state_t state; + unsigned long address; + + address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2(); prefetchw(¤t->mm->mmap_lock); |