summaryrefslogtreecommitdiff
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c129
1 files changed, 52 insertions, 77 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..296d294142c8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -7,7 +7,6 @@
#include <linux/sched.h> /* test_thread_flag(), ... */
#include <linux/sched/task_stack.h> /* task_stack_*(), ... */
#include <linux/kdebug.h> /* oops_begin/end, ... */
-#include <linux/extable.h> /* search_exception_tables */
#include <linux/memblock.h> /* max_low_pfn */
#include <linux/kfence.h> /* kfence_handle_page_fault */
#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
@@ -20,6 +19,7 @@
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
#include <linux/mm_types.h>
#include <linux/mm.h> /* find_and_lock_vma() */
+#include <linux/vmalloc.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -34,6 +34,8 @@
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
#include <asm/irq_stack.h>
+#include <asm/fred.h>
+#include <asm/sev.h> /* snp_dump_hva_rmpentry() */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -250,7 +252,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
- if (pmd_large(*pmd_k))
+ if (pmd_leaf(*pmd_k))
return 0;
pte_k = pte_offset_kernel(pmd_k, address);
@@ -319,7 +321,7 @@ static void dump_pagetable(unsigned long address)
* And let's rather not kmap-atomic the pte, just in case
* it's allocated already:
*/
- if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd))
goto out;
pte = pte_offset_kernel(pmd, address);
@@ -368,7 +370,7 @@ static void dump_pagetable(unsigned long address)
goto bad;
pr_cont("P4D %lx ", p4d_val(*p4d));
- if (!p4d_present(*p4d) || p4d_large(*p4d))
+ if (!p4d_present(*p4d) || p4d_leaf(*p4d))
goto out;
pud = pud_offset(p4d, address);
@@ -376,7 +378,7 @@ static void dump_pagetable(unsigned long address)
goto bad;
pr_cont("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud) || pud_large(*pud))
+ if (!pud_present(*pud) || pud_leaf(*pud))
goto out;
pmd = pmd_offset(pud, address);
@@ -384,7 +386,7 @@ static void dump_pagetable(unsigned long address)
goto bad;
pr_cont("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd) || pmd_large(*pmd))
+ if (!pmd_present(*pmd) || pmd_leaf(*pmd))
goto out;
pte = pte_offset_kernel(pmd, address);
@@ -512,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
if (error_code & X86_PF_INSTR) {
unsigned int level;
+ bool nx, rw;
pgd_t *pgd;
pte_t *pte;
pgd = __va(read_cr3_pa());
pgd += pgd_index(address);
- pte = lookup_address_in_pgd(pgd, address, &level);
+ pte = lookup_address_in_pgd_attr(pgd, address, &level, &nx, &rw);
- if (pte && pte_present(*pte) && !pte_exec(*pte))
+ if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx))
pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
from_kuid(&init_user_ns, current_uid()));
- if (pte && pte_present(*pte) && pte_exec(*pte) &&
+ if (pte && pte_present(*pte) && pte_exec(*pte) && !nx &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(__read_cr4() & X86_CR4_SMEP))
pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
@@ -547,6 +550,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
!(error_code & X86_PF_PROT) ? "not-present page" :
(error_code & X86_PF_RSVD) ? "reserved bit violation" :
(error_code & X86_PF_PK) ? "protection keys violation" :
+ (error_code & X86_PF_RMP) ? "RMP violation" :
"permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
@@ -579,6 +583,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
}
dump_pagetable(address);
+
+ if (error_code & X86_PF_RMP)
+ snp_dump_hva_rmpentry(address);
}
static noinline void
@@ -670,7 +677,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
ASM_CALL_ARG3,
, [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
- unreachable();
+ BUG();
}
#endif
@@ -717,39 +724,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
WARN_ON_ONCE(user_mode(regs));
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
- /*
- * Any interrupt that takes a fault gets the fixup. This makes
- * the below recursive fault logic only apply to a faults from
- * task context.
- */
- if (in_interrupt())
- return;
-
- /*
- * Per the above we're !in_interrupt(), aka. task context.
- *
- * In this case we need to make sure we're not recursively
- * faulting through the emulate_vsyscall() logic.
- */
- if (current->thread.sig_on_uaccess_err && signal) {
- sanitize_error_code(address, &error_code);
-
- set_signal_archinfo(address, error_code);
-
- if (si_code == SEGV_PKUERR) {
- force_sig_pkuerr((void __user *)address, pkey);
- } else {
- /* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address);
- }
- }
-
- /*
- * Barring that, we can do the fixup and be happy.
- */
+ if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
return;
- }
/*
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
@@ -798,15 +774,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
- return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
@@ -868,14 +835,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, u32 pkey, int si_code)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma, u32 pkey, int si_code)
{
- struct mm_struct *mm = current->mm;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
@@ -899,7 +869,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
static noinline void
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma)
{
/*
* This OSPKE check is not strictly necessary at runtime.
@@ -929,9 +900,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
*/
u32 pkey = vma_pkey(vma);
- __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
+ __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
} else {
- __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
+ __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
}
}
@@ -1039,21 +1010,21 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
if (!p4d_present(*p4d))
return 0;
- if (p4d_large(*p4d))
+ if (p4d_leaf(*p4d))
return spurious_kernel_fault_check(error_code, (pte_t *) p4d);
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
return 0;
- if (pud_large(*pud))
+ if (pud_leaf(*pud))
return spurious_kernel_fault_check(error_code, (pte_t *) pud);
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return 0;
- if (pmd_large(*pmd))
+ if (pmd_leaf(*pmd))
return spurious_kernel_fault_check(error_code, (pte_t *) pmd);
pte = pte_offset_kernel(pmd, address);
@@ -1302,21 +1273,14 @@ void do_user_addr_fault(struct pt_regs *regs,
return;
}
- /*
- * It's safe to allow irq's after cr2 has been saved and the
- * vmalloc fault has been handled.
- *
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet:
- */
- if (user_mode(regs)) {
- local_irq_enable();
- flags |= FAULT_FLAG_USER;
- } else {
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
+ /* Legacy check - remove this after verifying that it doesn't trigger */
+ if (WARN_ON_ONCE(!(regs->flags & X86_EFLAGS_IF))) {
+ bad_area_nosemaphore(regs, error_code, address);
+ return;
}
+ local_irq_enable();
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
@@ -1332,6 +1296,14 @@ void do_user_addr_fault(struct pt_regs *regs,
if (error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
+ /*
+ * We set FAULT_FLAG_USER based on the register state, not
+ * based on X86_PF_USER. User space accesses that cause
+ * system page faults are still user accesses.
+ */
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
#ifdef CONFIG_X86_64
/*
* Faults in the vsyscall page might need emulation. The
@@ -1358,8 +1330,9 @@ void do_user_addr_fault(struct pt_regs *regs,
goto lock_mmap;
if (unlikely(access_error(error_code, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ bad_area_access_error(regs, error_code, address, NULL, vma);
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return;
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -1395,7 +1368,7 @@ retry:
* we can handle it..
*/
if (unlikely(access_error(error_code, vma))) {
- bad_area_access_error(regs, error_code, address, vma);
+ bad_area_access_error(regs, error_code, address, mm, vma);
return;
}
@@ -1518,8 +1491,10 @@ handle_page_fault(struct pt_regs *regs, unsigned long error_code,
DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
{
- unsigned long address = read_cr2();
irqentry_state_t state;
+ unsigned long address;
+
+ address = cpu_feature_enabled(X86_FEATURE_FRED) ? fred_event_data(regs) : read_cr2();
prefetchw(&current->mm->mmap_lock);