diff options
Diffstat (limited to 'arch/x86/mm/fault.c')
| -rw-r--r-- | arch/x86/mm/fault.c | 78 | 
1 files changed, 78 insertions, 0 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 35f1498e9832..6e3e8a124903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)  	return pmd_k;  } +/* + *   Handle a fault on the vmalloc or module mapping area + * + *   This is needed because there is a race condition between the time + *   when the vmalloc mapping code updates the PMD to the point in time + *   where it synchronizes this update with the other page-tables in the + *   system. + * + *   In this race window another thread/CPU can map an area on the same + *   PMD, finds it already present and does not synchronize it with the + *   rest of the system yet. As a result v[mz]alloc might return areas + *   which are not mapped in every page-table in the system, causing an + *   unhandled page-fault when they are accessed. + */ +static noinline int vmalloc_fault(unsigned long address) +{ +	unsigned long pgd_paddr; +	pmd_t *pmd_k; +	pte_t *pte_k; + +	/* Make sure we are in vmalloc area: */ +	if (!(address >= VMALLOC_START && address < VMALLOC_END)) +		return -1; + +	/* +	 * Synchronize this task's top level page-table +	 * with the 'reference' page table. +	 * +	 * Do _not_ use "current" here. We might be inside +	 * an interrupt in the middle of a task switch.. +	 */ +	pgd_paddr = read_cr3_pa(); +	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); +	if (!pmd_k) +		return -1; + +	if (pmd_large(*pmd_k)) +		return 0; + +	pte_k = pte_offset_kernel(pmd_k, address); +	if (!pte_present(*pte_k)) +		return -1; + +	return 0; +} +NOKPROBE_SYMBOL(vmalloc_fault); +  void arch_sync_kernel_mappings(unsigned long start, unsigned long end)  {  	unsigned long addr; @@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,  	 */  	WARN_ON_ONCE(hw_error_code & X86_PF_PK); +#ifdef CONFIG_X86_32 +	/* +	 * We can fault-in kernel-space virtual memory on-demand. The +	 * 'reference' page table is init_mm.pgd. +	 * +	 * NOTE! We MUST NOT take any locks for this case. We may +	 * be in an interrupt or a critical region, and should +	 * only copy the information from the master page table, +	 * nothing more. +	 * +	 * Before doing this on-demand faulting, ensure that the +	 * fault is not any of the following: +	 * 1. A fault on a PTE with a reserved bit set. +	 * 2. A fault caused by a user-mode access.  (Do not demand- +	 *    fault kernel memory due to user-mode accesses). +	 * 3. A fault caused by a page-level protection violation. +	 *    (A demand fault would be on a non-present page which +	 *     would have X86_PF_PROT==0). +	 * +	 * This is only needed to close a race condition on x86-32 in +	 * the vmalloc mapping/unmapping code. See the comment above +	 * vmalloc_fault() for details. On x86-64 the race does not +	 * exist as the vmalloc mappings don't need to be synchronized +	 * there. +	 */ +	if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { +		if (vmalloc_fault(address) >= 0) +			return; +	} +#endif +  	/* Was the fault spurious, caused by lazy TLB invalidation? */  	if (spurious_kernel_fault(hw_error_code, address))  		return;  | 
