diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 155 |
1 files changed, 84 insertions, 71 deletions
diff --git a/mm/memory.c b/mm/memory.c index af84bc0ec17c..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -373,30 +373,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ /* - * If a p?d_bad entry is found while walking page tables, report - * the error, before resetting entry to p?d_none. Usually (but - * very seldom) called out from the p?d_none_or_clear_bad macros. - */ - -void pgd_clear_bad(pgd_t *pgd) -{ - pgd_ERROR(*pgd); - pgd_clear(pgd); -} - -void pud_clear_bad(pud_t *pud) -{ - pud_ERROR(*pud); - pud_clear(pud); -} - -void pmd_clear_bad(pmd_t *pmd) -{ - pmd_ERROR(*pmd); - pmd_clear(pmd); -} - -/* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ @@ -861,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ make_migration_entry_read(&entry); pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); set_pte_at(src_mm, addr, src_pte, pte); } } @@ -1505,7 +1483,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pud_none(*pud)) goto no_page_table; if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { - BUG_ON(flags & FOLL_GET); + if (flags & FOLL_GET) + goto out; page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); goto out; } @@ -1516,8 +1495,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pmd_none(*pmd)) goto no_page_table; if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { - BUG_ON(flags & FOLL_GET); page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); + if (flags & FOLL_GET) { + /* + * Refcount on tail pages are not well-defined and + * shouldn't be taken. The caller should handle a NULL + * return when trying to follow tail pages. + */ + if (PageHead(page)) + get_page(page); + else { + page = NULL; + goto out; + } + } goto out; } if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) @@ -3530,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, } int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, - unsigned long addr, int current_nid) + unsigned long addr, int page_nid) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); return mpol_misplaced(page, vma, addr); @@ -3546,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page = NULL; spinlock_t *ptl; - int current_nid = -1; + int page_nid = -1; int target_nid; bool migrated = false; @@ -3576,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } - current_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, addr, current_nid); + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { - /* - * Account for the fault against the current node if it not - * being replaced regardless of where the page is located. - */ - current_nid = numa_node_id(); put_page(page); goto out; } @@ -3592,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migrate to the requested node */ migrated = migrate_misplaced_page(page, target_nid); if (migrated) - current_nid = target_nid; + page_nid = target_nid; out: - if (current_nid != -1) - task_numa_fault(current_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); return 0; } @@ -3611,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; - int local_nid = numa_node_id(); spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3634,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; - int curr_nid = local_nid; + int page_nid = -1; int target_nid; - bool migrated; + bool migrated = false; + if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3658,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(page_mapcount(page) != 1)) continue; - /* - * Note that the NUMA fault is later accounted to either - * the node that is currently running or where the page is - * migrated to. - */ - curr_nid = local_nid; - target_nid = numa_migrate_prep(page, vma, addr, - page_to_nid(page)); - if (target_nid == -1) { + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); + pte_unmap_unlock(pte, ptl); + if (target_nid != -1) { + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) + page_nid = target_nid; + } else { put_page(page); - continue; } - /* Migrate to the requested node */ - pte_unmap_unlock(pte, ptl); - migrated = migrate_misplaced_page(page, target_nid); - if (migrated) - curr_nid = target_nid; - task_numa_fault(curr_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } @@ -3706,7 +3686,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * but allow concurrent faults), and pte mapped but not yet locked. * We return with mmap_sem still held, but pte unmapped and unlocked. */ -int handle_pte_fault(struct mm_struct *mm, +static int handle_pte_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *pte, pmd_t *pmd, unsigned int flags) { @@ -3765,22 +3745,14 @@ unlock: /* * By the time we get here, we already hold the mm semaphore */ -int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, unsigned int flags) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - __set_current_state(TASK_RUNNING); - - count_vm_event(PGFAULT); - mem_cgroup_count_vm_event(mm, PGFAULT); - - /* do counter updates before entering really critical section. */ - check_sync_rss_stat(current); - if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); @@ -3793,9 +3765,12 @@ retry: if (!pmd) return VM_FAULT_OOM; if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { + int ret = VM_FAULT_FALLBACK; if (!vma->vm_ops) - return do_huge_pmd_anonymous_page(mm, vma, address, - pmd, flags); + ret = do_huge_pmd_anonymous_page(mm, vma, address, + pmd, flags); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; } else { pmd_t orig_pmd = *pmd; int ret; @@ -3861,6 +3836,43 @@ retry: return handle_pte_fault(mm, vma, address, pte, pmd, flags); } +int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + int ret; + + __set_current_state(TASK_RUNNING); + + count_vm_event(PGFAULT); + mem_cgroup_count_vm_event(mm, PGFAULT); + + /* do counter updates before entering really critical section. */ + check_sync_rss_stat(current); + + /* + * Enable the memcg OOM handling for faults triggered in user + * space. Kernel faults are handled more gracefully. + */ + if (flags & FAULT_FLAG_USER) + mem_cgroup_oom_enable(); + + ret = __handle_mm_fault(mm, vma, address, flags); + + if (flags & FAULT_FLAG_USER) { + mem_cgroup_oom_disable(); + /* + * The task may have entered a memcg OOM situation but + * if the allocation error was handled gracefully (no + * VM_FAULT_OOM), there is no need to kill anything. + * Just clean up the OOM state peacefully. + */ + if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) + mem_cgroup_oom_synchronize(false); + } + + return ret; +} + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. @@ -4079,6 +4091,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, return len; } +EXPORT_SYMBOL_GPL(generic_access_phys); #endif /* |