diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup_benchmark.c | 3 | ||||
-rw-r--r-- | mm/huge_memory.c | 8 | ||||
-rw-r--r-- | mm/hugetlb.c | 90 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 62 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 12 | ||||
-rw-r--r-- | mm/percpu.c | 1 | ||||
-rw-r--r-- | mm/rmap.c | 42 | ||||
-rw-r--r-- | mm/vmscan.c | 7 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
11 files changed, 134 insertions, 99 deletions
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c index 6a473709e9b6..7405c9d89d65 100644 --- a/mm/gup_benchmark.c +++ b/mm/gup_benchmark.c @@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd, struct gup_benchmark *gup) { ktime_t start_time, end_time; - unsigned long i, nr, nr_pages, addr, next; + unsigned long i, nr_pages, addr, next; + int nr; struct page **pages; nr_pages = gup->size / PAGE_SIZE; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 533f9b00147d..58269f8ba7c4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2885,9 +2885,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (!(pvmw->pmd && !pvmw->pte)) return; - mmu_notifier_invalidate_range_start(mm, address, - address + HPAGE_PMD_SIZE); - flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = *pvmw->pmd; pmdp_invalidate(vma, address, pvmw->pmd); @@ -2900,9 +2897,6 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, set_pmd_at(mm, address, pvmw->pmd, pmdswp); page_remove_rmap(page, true); put_page(page); - - mmu_notifier_invalidate_range_end(mm, address, - address + HPAGE_PMD_SIZE); } void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) @@ -2931,7 +2925,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) else page_add_file_rmap(new, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); - if (vma->vm_flags & VM_LOCKED) + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) mlock_vma_page(new); update_mmu_cache_pmd(vma, address, pvmw->pmd); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3c21775f196b..5c390f5a5207 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - const unsigned long mmun_start = start; /* For mmu_notifiers */ - const unsigned long mmun_end = end; /* For mmu_notifiers */ + unsigned long mmun_start = start; /* For mmu_notifiers */ + unsigned long mmun_end = end; /* For mmu_notifiers */ WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, */ tlb_remove_check_page_size_change(tlb, sz); tlb_start_vma(tlb, vma); + + /* + * If sharing possible, alert mmu notifiers of worst case. + */ + adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); address = start; for (; address < end; address += sz) { @@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { spin_unlock(ptl); + /* + * We just unmapped a page of PMDs by clearing a PUD. + * The caller's TLB flush range should cover this area. + */ continue; } @@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm; struct mmu_gather tlb; + unsigned long tlb_start = start; + unsigned long tlb_end = end; + + /* + * If shared PMDs were possibly used within this vma range, adjust + * start/end for worst case tlb flushing. + * Note that we can not be sure if PMDs are shared until we try to + * unmap pages. However, we want to make sure TLB flushing covers + * the largest possible range. + */ + adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); mm = vma->vm_mm; - tlb_gather_mmu(&tlb, mm, start, end); + tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); - tlb_finish_mmu(&tlb, start, end); + tlb_finish_mmu(&tlb, tlb_start, tlb_end); } /* @@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte_t pte; struct hstate *h = hstate_vma(vma); unsigned long pages = 0; + unsigned long f_start = start; + unsigned long f_end = end; + bool shared_pmd = false; + + /* + * In the case of shared PMDs, the area to flush could be beyond + * start/end. Set f_start/f_end to cover the maximum possible + * range if PMD sharing is possible. + */ + adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); BUG_ON(address >= end); - flush_cache_range(vma, address, end); + flush_cache_range(vma, f_start, f_end); - mmu_notifier_invalidate_range_start(mm, start, end); + mmu_notifier_invalidate_range_start(mm, f_start, f_end); i_mmap_lock_write(vma->vm_file->f_mapping); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; @@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (huge_pmd_unshare(mm, &address, ptep)) { pages++; spin_unlock(ptl); + shared_pmd = true; continue; } pte = huge_ptep_get(ptep); @@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare * may have cleared our pud entry and done put_page on the page table: * once we release i_mmap_rwsem, another task can do the final put_page - * and that page table be reused and filled with junk. + * and that page table be reused and filled with junk. If we actually + * did unshare a page of pmds, flush the range corresponding to the pud. */ - flush_hugetlb_tlb_range(vma, start, end); + if (shared_pmd) + flush_hugetlb_tlb_range(vma, f_start, f_end); + else + flush_hugetlb_tlb_range(vma, start, end); /* * No need to call mmu_notifier_invalidate_range() we are downgrading * page table protection not changing it to point to a new page. @@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * See Documentation/vm/mmu_notifier.rst */ i_mmap_unlock_write(vma->vm_file->f_mapping); - mmu_notifier_invalidate_range_end(mm, start, end); + mmu_notifier_invalidate_range_end(mm, f_start, f_end); return pages << h->order; } @@ -4545,13 +4580,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) /* * check on proper vm_flags and page table alignment */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) return true; return false; } /* + * Determine if start,end range within vma could be mapped by shared pmd. + * If yes, adjust start and end to cover range associated with possible + * shared pmd mappings. + */ +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + unsigned long check_addr = *start; + + if (!(vma->vm_flags & VM_MAYSHARE)) + return; + + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { + unsigned long a_start = check_addr & PUD_MASK; + unsigned long a_end = a_start + PUD_SIZE; + + /* + * If sharing is possible, adjust start/end if necessary. + */ + if (range_in_vma(vma, a_start, a_end)) { + if (a_start < *start) + *start = a_start; + if (a_end > *end) + *end = a_end; + } + } +} + +/* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the * !shared pmd case because we can allocate the pmd later as well, it makes the @@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } + +void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ diff --git a/mm/madvise.c b/mm/madvise.c index 972a9eaa898b..71d21df2a3f3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma, new_flags |= VM_DONTDUMP; break; case MADV_DODUMP: - if (new_flags & VM_SPECIAL) { + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { error = -EINVAL; goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index d6a2e89b086a..84381b55b2bd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) mlock_vma_page(new); + if (PageTransHuge(page) && PageMlocked(page)) + clear_page_mlock(page); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, pvmw.address, pvmw.pte); } @@ -1411,7 +1414,7 @@ retry: * we encounter them after the rest of the list * is processed. */ - if (PageTransHuge(page)) { + if (PageTransHuge(page) && !PageHuge(page)) { lock_page(page); rc = split_huge_page_to_list(page, from); unlock_page(page); @@ -1855,46 +1858,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page, return newpage; } -/* - * page migration rate limiting control. - * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs - * window of time. Default here says do not migrate more than 1280M per second. - */ -static unsigned int migrate_interval_millisecs __read_mostly = 100; -static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); - -/* Returns true if the node is migrate rate-limited after the update */ -static bool numamigrate_update_ratelimit(pg_data_t *pgdat, - unsigned long nr_pages) -{ - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { - spin_lock(&pgdat->numabalancing_migrate_lock); - pgdat->numabalancing_migrate_nr_pages = 0; - pgdat->numabalancing_migrate_next_window = jiffies + - msecs_to_jiffies(migrate_interval_millisecs); - spin_unlock(&pgdat->numabalancing_migrate_lock); - } - if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { - trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, - nr_pages); - return true; - } - - /* - * This is an unlocked non-atomic update so errors are possible. - * The consequences are failing to migrate when we potentiall should - * have which is not severe enough to warrant locking. If it is ever - * a problem, it can be converted to a per-cpu counter. - */ - pgdat->numabalancing_migrate_nr_pages += nr_pages; - return false; -} - static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; @@ -1967,14 +1930,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, if (page_is_file_cache(page) && PageDirty(page)) goto out; - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (numamigrate_update_ratelimit(pgdat, 1)) - goto out; - isolated = numamigrate_isolate_page(pgdat, page); if (!isolated) goto out; @@ -2021,14 +1976,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unsigned long mmun_start = address & HPAGE_PMD_MASK; unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; - /* - * Rate-limit the amount of data that is being migrated to a node. - * Optimal placement is no good if the memory bus is saturated and - * all the time is being spent migrating! - */ - if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR)) - goto out_dropref; - new_page = alloc_pages_node(node, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), HPAGE_PMD_ORDER); @@ -2125,7 +2072,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); -out_dropref: ptl = pmd_lock(mm, pmd); if (pmd_same(*pmd, entry)) { entry = pmd_modify(entry, vma->vm_page_prot); diff --git a/mm/mmap.c b/mm/mmap.c index 5f2b2b184c60..f7cd9cb966c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1410,7 +1410,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (flags & MAP_FIXED_NOREPLACE) { struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && vma->vm_start <= addr) + if (vma && vma->vm_start < addr + len) return -EEXIST; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 89d2a2ab3fe6..e2ef1c17942f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6193,17 +6193,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages, return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; } -#ifdef CONFIG_NUMA_BALANCING -static void pgdat_init_numabalancing(struct pglist_data *pgdat) -{ - spin_lock_init(&pgdat->numabalancing_migrate_lock); - pgdat->numabalancing_migrate_nr_pages = 0; - pgdat->numabalancing_migrate_next_window = jiffies; -} -#else -static void pgdat_init_numabalancing(struct pglist_data *pgdat) {} -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void pgdat_init_split_queue(struct pglist_data *pgdat) { @@ -6228,7 +6217,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { pgdat_resize_init(pgdat); - pgdat_init_numabalancing(pgdat); pgdat_init_split_queue(pgdat); pgdat_init_kcompactd(pgdat); diff --git a/mm/percpu.c b/mm/percpu.c index a749d4d96e3e..4b90682623e9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1212,6 +1212,7 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; + pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); pcpu_mem_free(chunk); diff --git a/mm/rmap.c b/mm/rmap.c index eb477809a5c0..1e79fac3186b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* - * We have to assume the worse case ie pmd for invalidation. Note that - * the page can not be free in this function as call of try_to_unmap() - * must hold a reference on the page. + * For THP, we have to assume the worse case ie pmd for invalidation. + * For hugetlb, it could be much worse if we need to do pud + * invalidation in the case of pmd sharing. + * + * Note that the page can not be free in this function as call of + * try_to_unmap() must hold a reference on the page. */ end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); + if (PageHuge(page)) { + /* + * If sharing is possible, start and end will be adjusted + * accordingly. + */ + adjust_range_if_pmd_sharing_possible(vma, &start, &end); + } mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); while (page_vma_mapped_walk(&pvmw)) { @@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; + if (PageHuge(page)) { + if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + /* + * huge_pmd_unshare unmapped an entire PMD + * page. There is no way of knowing exactly + * which PMDs may be cached for this mm, so + * we must flush them all. start/end were + * already adjusted above to cover this range. + */ + flush_cache_range(vma, start, end); + flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); + + /* + * The ref count of the PMD page was dropped + * which is part of the way map counting + * is done for shared PMDs. Return 'true' + * here. When there is no other sharing, + * huge_pmd_unshare returns false and we will + * unmap the actual page and drop map count + * to zero. + */ + page_vma_mapped_walk_done(&pvmw); + break; + } + } if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && diff --git a/mm/vmscan.c b/mm/vmscan.c index c7ce2c161225..c5ef7240cbcb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { struct memcg_shrinker_map *map; - unsigned long freed = 0; - int ret, i; + unsigned long ret, freed = 0; + int i; if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) return 0; @@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority) { + unsigned long ret, freed = 0; struct shrinker *shrinker; - unsigned long freed = 0; - int ret; if (!mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); diff --git a/mm/vmstat.c b/mm/vmstat.c index 8ba0870ecddd..7878da76abf2 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", +#else + "", /* nr_tlb_remote_flush */ + "", /* nr_tlb_remote_flush_received */ #endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", @@ -1283,7 +1286,6 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DEBUG_VM_VMACACHE "vmacache_find_calls", "vmacache_find_hits", - "vmacache_full_flushes", #endif #ifdef CONFIG_SWAP "swap_ra", |