From b063e374e7ae75589a36f4bcbfb47956ac197c57 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 5 Nov 2021 13:38:18 -0700 Subject: mm/memory.c: avoid unnecessary kernel/user pointer conversion Annotating a pointer from __user to kernel and then back again might confuse sparse. In copy_huge_page_from_user() it can be avoided by removing the intermediate variable since it is never used. Link: https://lkml.kernel.org/r/20210914150820.19326-1-amit.kachhap@arm.com Signed-off-by: Amit Daniel Kachhap Acked-by: Kirill A. Shutemov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index c52be6d6b605..1b7032a30dd5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5421,7 +5421,6 @@ long copy_huge_page_from_user(struct page *dst_page, unsigned int pages_per_huge_page, bool allow_pagefault) { - void *src = (void *)usr_src; void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; @@ -5434,8 +5433,7 @@ long copy_huge_page_from_user(struct page *dst_page, else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, - (const void __user *)(src + i * PAGE_SIZE), - PAGE_SIZE); + usr_src + i * PAGE_SIZE, PAGE_SIZE); if (allow_pagefault) kunmap(subpage); else -- cgit From 2ca99358671ad3a2065389476701f69f39ab5e5f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 5 Nov 2021 13:38:28 -0700 Subject: mm: clear vmf->pte after pte_unmap_same() returns pte_unmap_same() will always unmap the pte pointer. After the unmap, vmf->pte will not be valid any more, we should clear it. It was safe only because no one is accessing vmf->pte after pte_unmap_same() returns, since the only caller of pte_unmap_same() (so far) is do_swap_page(), where vmf->pte will in most cases be overwritten very soon. Directly pass in vmf into pte_unmap_same() and then we can also avoid the long parameter list too, which should be a nice cleanup. Link: https://lkml.kernel.org/r/20210915181533.11188-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Liam Howlett Acked-by: Hugh Dickins Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 1b7032a30dd5..cf7b059b57a7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2724,19 +2724,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range); * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ -static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, - pte_t *page_table, pte_t orig_pte) +static inline int pte_unmap_same(struct vm_fault *vmf) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION) if (sizeof(pte_t) > sizeof(unsigned long)) { - spinlock_t *ptl = pte_lockptr(mm, pmd); + spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); spin_lock(ptl); - same = pte_same(*page_table, orig_pte); + same = pte_same(*vmf->pte, vmf->orig_pte); spin_unlock(ptl); } #endif - pte_unmap(page_table); + pte_unmap(vmf->pte); + vmf->pte = NULL; return same; } @@ -3488,7 +3488,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vm_fault_t ret = 0; void *shadow = NULL; - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) + if (!pte_unmap_same(vmf)) goto out; entry = pte_to_swp_entry(vmf->orig_pte); -- cgit From 232a6a1c0619d1b4d9cd8d21949b2f13821be0af Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 5 Nov 2021 13:38:31 -0700 Subject: mm: drop first_index/last_index in zap_details The first_index/last_index parameters in zap_details are actually only used in unmap_mapping_range_tree(). At the meantime, this function is only called by unmap_mapping_pages() once. Instead of passing these two variables through the whole stack of page zapping code, remove them from zap_details and let them simply be parameters of unmap_mapping_range_tree(), which is inlined. Link: https://lkml.kernel.org/r/20210915181535.11238-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Alistair Popple Reviewed-by: David Hildenbrand Reviewed-by: Liam Howlett Acked-by: Hugh Dickins Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index cf7b059b57a7..d4ed701e3e5d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3321,20 +3321,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, } static inline void unmap_mapping_range_tree(struct rb_root_cached *root, + pgoff_t first_index, + pgoff_t last_index, struct zap_details *details) { struct vm_area_struct *vma; pgoff_t vba, vea, zba, zea; - vma_interval_tree_foreach(vma, root, - details->first_index, details->last_index) { - + vma_interval_tree_foreach(vma, root, first_index, last_index) { vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; - zba = details->first_index; + zba = first_index; if (zba < vba) zba = vba; - zea = details->last_index; + zea = last_index; if (zea > vea) zea = vea; @@ -3360,18 +3360,22 @@ void unmap_mapping_page(struct page *page) { struct address_space *mapping = page->mapping; struct zap_details details = { }; + pgoff_t first_index; + pgoff_t last_index; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageTail(page)); + first_index = page->index; + last_index = page->index + thp_nr_pages(page) - 1; + details.check_mapping = mapping; - details.first_index = page->index; - details.last_index = page->index + thp_nr_pages(page) - 1; details.single_page = page; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, &details); + unmap_mapping_range_tree(&mapping->i_mmap, first_index, + last_index, &details); i_mmap_unlock_write(mapping); } @@ -3391,16 +3395,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { struct zap_details details = { }; + pgoff_t first_index = start; + pgoff_t last_index = start + nr - 1; details.check_mapping = even_cows ? NULL : mapping; - details.first_index = start; - details.last_index = start + nr - 1; - if (details.last_index < details.first_index) - details.last_index = ULONG_MAX; + if (last_index < first_index) + last_index = ULONG_MAX; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, &details); + unmap_mapping_range_tree(&mapping->i_mmap, first_index, + last_index, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL_GPL(unmap_mapping_pages); -- cgit From 91b61ef333cf43f96b3522a086c9ac925763d6e5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 5 Nov 2021 13:38:34 -0700 Subject: mm: add zap_skip_check_mapping() helper Use the helper for the checks. Rename "check_mapping" into "zap_mapping" because "check_mapping" looks like a bool but in fact it stores the mapping itself. When it's set, we check the mapping (it must be non-NULL). When it's cleared we skip the check, which works like the old way. Move the duplicated comments to the helper too. Link: https://lkml.kernel.org/r/20210915181538.11288-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Liam Howlett Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index d4ed701e3e5d..48b2e048d267 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1333,16 +1333,8 @@ again: struct page *page; page = vm_normal_page(vma, addr, ptent); - if (unlikely(details) && page) { - /* - * unmap_shared_mapping_pages() wants to - * invalidate cache without truncating: - * unmap shared but keep private pages. - */ - if (details->check_mapping && - details->check_mapping != page_rmapping(page)) - continue; - } + if (unlikely(zap_skip_check_mapping(details, page))) + continue; ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); @@ -1375,17 +1367,8 @@ again: is_device_exclusive_entry(entry)) { struct page *page = pfn_swap_entry_to_page(entry); - if (unlikely(details && details->check_mapping)) { - /* - * unmap_shared_mapping_pages() wants to - * invalidate cache without truncating: - * unmap shared but keep private pages. - */ - if (details->check_mapping != - page_rmapping(page)) - continue; - } - + if (unlikely(zap_skip_check_mapping(details, page))) + continue; pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; @@ -3369,7 +3352,7 @@ void unmap_mapping_page(struct page *page) first_index = page->index; last_index = page->index + thp_nr_pages(page) - 1; - details.check_mapping = mapping; + details.zap_mapping = mapping; details.single_page = page; i_mmap_lock_write(mapping); @@ -3398,7 +3381,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t first_index = start; pgoff_t last_index = start + nr - 1; - details.check_mapping = even_cows ? NULL : mapping; + details.zap_mapping = even_cows ? NULL : mapping; if (last_index < first_index) last_index = ULONG_MAX; -- cgit From 03c4f20454e0231d2cdec4373841a3a25cf4efed Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 5 Nov 2021 13:38:38 -0700 Subject: mm: introduce pmd_install() helper Patch series "Do some code cleanups related to mm", v3. This patch (of 2): Currently we have three times the same few lines repeated in the code. Deduplicate them by newly introduced pmd_install() helper. Link: https://lkml.kernel.org/r/20210901102722.47686-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20210901102722.47686-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Acked-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Mika Penttila Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 48b2e048d267..38d63f6d998b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -433,9 +433,20 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } +void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) +{ + spinlock_t *ptl = pmd_lock(mm, pmd); + + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + mm_inc_nr_ptes(mm); + pmd_populate(mm, pmd, *pte); + *pte = NULL; + } + spin_unlock(ptl); +} + int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) { - spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm); if (!new) return -ENOMEM; @@ -455,13 +466,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - ptl = pmd_lock(mm, pmd); - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm_inc_nr_ptes(mm); - pmd_populate(mm, pmd, new); - new = NULL; - } - spin_unlock(ptl); + pmd_install(mm, pmd, &new); if (new) pte_free(mm, new); return 0; @@ -4024,17 +4029,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (vmf->prealloc_pte) { - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (likely(pmd_none(*vmf->pmd))) { - mm_inc_nr_ptes(vma->vm_mm); - pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); - vmf->prealloc_pte = NULL; - } - spin_unlock(vmf->ptl); - } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { + if (vmf->prealloc_pte) + pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); + else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) return VM_FAULT_OOM; - } } /* See comment in handle_pte_fault() */ -- cgit From ed33b5a677da33d6e8f959879bb61e9791b80354 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 5 Nov 2021 13:38:41 -0700 Subject: mm: remove redundant smp_wmb() The smp_wmb() which is in the __pte_alloc() is used to ensure all ptes setup is visible before the pte is made visible to other CPUs by being put into page tables. We only need this when the pte is actually populated, so move it to pmd_install(). __pte_alloc_kernel(), __p4d_alloc(), __pud_alloc() and __pmd_alloc() are similar to this case. We can also defer smp_wmb() to the place where the pmd entry is really populated by preallocated pte. There are two kinds of user of preallocated pte, one is filemap & finish_fault(), another is THP. The former does not need another smp_wmb() because the smp_wmb() has been done by pmd_install(). Fortunately, the latter also does not need another smp_wmb() because there is already a smp_wmb() before populating the new pte when the THP uses a preallocated pte to split a huge pmd. Link: https://lkml.kernel.org/r/20210901102722.47686-3-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Johannes Weiner Cc: Michal Hocko Cc: Mika Penttila Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 38d63f6d998b..5db36280950a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -439,6 +439,20 @@ void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ mm_inc_nr_ptes(mm); + /* + * Ensure all pte setup (eg. pte page lock and page clearing) are + * visible before the pte is made visible to other CPUs by being + * put into page tables. + * + * The other side of the story is the pointer chasing in the page + * table walking code (when walking the page table without locking; + * ie. most of the time). Fortunately, these data accesses consist + * of a chain of data-dependent loads, meaning most CPUs (alpha + * being the notable exception) will already guarantee loads are + * seen in-order. See the alpha page table accessors for the + * smp_rmb() barriers in page table walking code. + */ + smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ pmd_populate(mm, pmd, *pte); *pte = NULL; } @@ -451,21 +465,6 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) if (!new) return -ENOMEM; - /* - * Ensure all pte setup (eg. pte page lock and page clearing) are - * visible before the pte is made visible to other CPUs by being - * put into page tables. - * - * The other side of the story is the pointer chasing in the page - * table walking code (when walking the page table without locking; - * ie. most of the time). Fortunately, these data accesses consist - * of a chain of data-dependent loads, meaning most CPUs (alpha - * being the notable exception) will already guarantee loads are - * seen in-order. See the alpha page table accessors for the - * smp_rmb() barriers in page table walking code. - */ - smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - pmd_install(mm, pmd, &new); if (new) pte_free(mm, new); @@ -478,10 +477,9 @@ int __pte_alloc_kernel(pmd_t *pmd) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&init_mm.page_table_lock); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + smp_wmb(); /* See comment in pmd_install() */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; } @@ -3845,7 +3843,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } ret = vma->vm_ops->fault(vmf); @@ -3916,7 +3913,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -4141,7 +4137,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); @@ -4815,13 +4810,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) /* Another has populated it */ + if (pgd_present(*pgd)) { /* Another has populated it */ p4d_free(mm, new); - else + } else { + smp_wmb(); /* See comment in pmd_install() */ pgd_populate(mm, pgd, new); + } spin_unlock(&mm->page_table_lock); return 0; } @@ -4838,11 +4833,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&mm->page_table_lock); if (!p4d_present(*p4d)) { mm_inc_nr_puds(mm); + smp_wmb(); /* See comment in pmd_install() */ p4d_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); @@ -4863,14 +4857,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - ptl = pud_lock(mm, pud); if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); + smp_wmb(); /* See comment in pmd_install() */ pud_populate(mm, pud, new); - } else /* Another has populated it */ + } else { /* Another has populated it */ pmd_free(mm, new); + } spin_unlock(ptl); return 0; } -- cgit