diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 171 |
1 files changed, 79 insertions, 92 deletions
diff --git a/mm/memory.c b/mm/memory.c index b0cda5aab398..0ba4f6b71847 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -57,7 +57,6 @@ #include <linux/export.h> #include <linux/delayacct.h> #include <linux/init.h> -#include <linux/pfn_t.h> #include <linux/writeback.h> #include <linux/memcontrol.h> #include <linux/mmu_notifier.h> @@ -125,6 +124,24 @@ int randomize_va_space __read_mostly = 2; #endif +static const struct ctl_table mmu_sysctl_table[] = { + { + .procname = "randomize_va_space", + .data = &randomize_va_space, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +}; + +static int __init init_mm_sysctl(void) +{ + register_sysctl_init("kernel", mmu_sysctl_table); + return 0; +} + +subsys_initcall(init_mm_sysctl); + #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { @@ -380,32 +397,26 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, vma_start_write(vma); unlink_anon_vmas(vma); - if (is_vm_hugetlb_page(vma)) { - unlink_file_vma(vma); - hugetlb_free_pgd_range(tlb, addr, vma->vm_end, - floor, next ? next->vm_start : ceiling); - } else { - unlink_file_vma_batch_init(&vb); - unlink_file_vma_batch_add(&vb, vma); + unlink_file_vma_batch_init(&vb); + unlink_file_vma_batch_add(&vb, vma); - /* - * Optimization: gather nearby vmas into one call down - */ - while (next && next->vm_start <= vma->vm_end + PMD_SIZE - && !is_vm_hugetlb_page(next)) { - vma = next; - next = mas_find(mas, ceiling - 1); - if (unlikely(xa_is_zero(next))) - next = NULL; - if (mm_wr_locked) - vma_start_write(vma); - unlink_anon_vmas(vma); - unlink_file_vma_batch_add(&vb, vma); - } - unlink_file_vma_batch_final(&vb); - free_pgd_range(tlb, addr, vma->vm_end, - floor, next ? next->vm_start : ceiling); + /* + * Optimization: gather nearby vmas into one call down + */ + while (next && next->vm_start <= vma->vm_end + PMD_SIZE) { + vma = next; + next = mas_find(mas, ceiling - 1); + if (unlikely(xa_is_zero(next))) + next = NULL; + if (mm_wr_locked) + vma_start_write(vma); + unlink_anon_vmas(vma); + unlink_file_vma_batch_add(&vb, vma); } + unlink_file_vma_batch_final(&vb); + + free_pgd_range(tlb, addr, vma->vm_end, + floor, next ? next->vm_start : ceiling); vma = next; } while (vma); } @@ -598,16 +609,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, return NULL; if (is_zero_pfn(pfn)) return NULL; - if (pte_devmap(pte)) - /* - * NOTE: New users of ZONE_DEVICE will not set pte_devmap() - * and will have refcounts incremented on their struct pages - * when they are inserted into PTEs, thus they are safe to - * return here. Legacy ZONE_DEVICE pages that set pte_devmap() - * do not have refcounts. Example of legacy ZONE_DEVICE is - * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers. - */ - return NULL; print_bad_pte(vma, addr, pte, NULL); return NULL; @@ -685,9 +686,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, } } - if (pmd_devmap(pmd)) - return NULL; - if (is_huge_zero_pmd(pmd)) + if (is_huge_zero_pfn(pfn)) return NULL; if (unlikely(pfn > highest_memmap_pfn)) return NULL; @@ -797,7 +796,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long addr, int *rss) { - unsigned long vm_flags = dst_vma->vm_flags; + vm_flags_t vm_flags = dst_vma->vm_flags; pte_t orig_pte = ptep_get(src_pte); pte_t pte = orig_pte; struct folio *folio; @@ -985,10 +984,9 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr, int max_nr, int *rss, struct folio **prealloc) { + fpb_t flags = FPB_MERGE_WRITE; struct page *page; struct folio *folio; - bool any_writable; - fpb_t flags = 0; int err, nr; page = vm_normal_page(src_vma, addr, pte); @@ -1003,13 +1001,12 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * by keeping the batching logic separate. */ if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) { - if (src_vma->vm_flags & VM_SHARED) - flags |= FPB_IGNORE_DIRTY; - if (!vma_soft_dirty_enabled(src_vma)) - flags |= FPB_IGNORE_SOFT_DIRTY; + if (!(src_vma->vm_flags & VM_SHARED)) + flags |= FPB_RESPECT_DIRTY; + if (vma_soft_dirty_enabled(src_vma)) + flags |= FPB_RESPECT_SOFT_DIRTY; - nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags, - &any_writable, NULL, NULL); + nr = folio_pte_batch_flags(folio, src_vma, src_pte, &pte, max_nr, flags); folio_ref_add(folio, nr); if (folio_test_anon(folio)) { if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page, @@ -1023,8 +1020,6 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma folio_dup_file_rmap_ptes(folio, page, nr, dst_vma); rss[mm_counter_file(folio)] += nr; } - if (any_writable) - pte = pte_mkwrite(pte, src_vma); __copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte, pte, addr, nr); return nr; @@ -1250,8 +1245,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) - || pmd_devmap(*src_pmd)) { + if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, @@ -1287,7 +1281,7 @@ copy_pud_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); - if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { + if (pud_trans_huge(*src_pud)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); @@ -1549,7 +1543,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, struct zap_details *details, int *rss, bool *force_flush, bool *force_break, bool *any_skipped) { - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; struct mm_struct *mm = tlb->mm; struct folio *folio; struct page *page; @@ -1579,9 +1572,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, * by keeping the batching logic separate. */ if (unlikely(folio_test_large(folio) && max_nr != 1)) { - nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags, - NULL, NULL, NULL); - + nr = folio_pte_batch(folio, pte, ptent, max_nr); zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr, addr, details, rss, force_flush, force_break, any_skipped); @@ -1801,7 +1792,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false); else if (zap_huge_pmd(tlb, vma, pmd, addr)) { @@ -1843,7 +1834,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); - if (pud_trans_huge(*pud) || pud_devmap(*pud)) { + if (pud_trans_huge(*pud)) { if (next - addr != HPAGE_PUD_SIZE) { mmap_assert_locked(tlb->mm); split_huge_pud(vma, pud, addr); @@ -2448,7 +2439,7 @@ int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, EXPORT_SYMBOL(vm_map_pages_zero); static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn, pgprot_t prot, bool mkwrite) + unsigned long pfn, pgprot_t prot, bool mkwrite) { struct mm_struct *mm = vma->vm_mm; pte_t *pte, entry; @@ -2470,7 +2461,7 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, * allocation and mapping invalidation so just skip the * update. */ - if (pte_pfn(entry) != pfn_t_to_pfn(pfn)) { + if (pte_pfn(entry) != pfn) { WARN_ON_ONCE(!is_zero_pfn(pte_pfn(entry))); goto out_unlock; } @@ -2483,10 +2474,7 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, } /* Ok, finally just insert the thing.. */ - if (pfn_t_devmap(pfn)) - entry = pte_mkdevmap(pfn_t_pte(pfn, prot)); - else - entry = pte_mkspecial(pfn_t_pte(pfn, prot)); + entry = pte_mkspecial(pfn_pte(pfn, prot)); if (mkwrite) { entry = pte_mkyoung(entry); @@ -2557,8 +2545,7 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, pfnmap_setup_cachemode_pfn(pfn, &pgprot); - return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot, - false); + return insert_pfn(vma, addr, pfn, pgprot, false); } EXPORT_SYMBOL(vmf_insert_pfn_prot); @@ -2589,25 +2576,22 @@ vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vmf_insert_pfn); -static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite) +static bool vm_mixed_ok(struct vm_area_struct *vma, unsigned long pfn, + bool mkwrite) { - if (unlikely(is_zero_pfn(pfn_t_to_pfn(pfn))) && + if (unlikely(is_zero_pfn(pfn)) && (mkwrite || !vm_mixed_zeropage_allowed(vma))) return false; /* these checks mirror the abort conditions in vm_normal_page */ if (vma->vm_flags & VM_MIXEDMAP) return true; - if (pfn_t_devmap(pfn)) - return true; - if (pfn_t_special(pfn)) - return true; - if (is_zero_pfn(pfn_t_to_pfn(pfn))) + if (is_zero_pfn(pfn)) return true; return false; } static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn, bool mkwrite) + unsigned long addr, unsigned long pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; int err; @@ -2618,9 +2602,9 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, if (addr < vma->vm_start || addr >= vma->vm_end) return VM_FAULT_SIGBUS; - pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot); + pfnmap_setup_cachemode_pfn(pfn, &pgprot); - if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot)) + if (!pfn_modify_allowed(pfn, pgprot)) return VM_FAULT_SIGBUS; /* @@ -2630,8 +2614,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP * without pte special, it would there be refcounted as a normal page. */ - if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && - !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { + if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pfn_valid(pfn)) { struct page *page; /* @@ -2639,7 +2622,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * regardless of whether the caller specified flags that * result in pfn_t_has_page() == false. */ - page = pfn_to_page(pfn_t_to_pfn(pfn)); + page = pfn_to_page(pfn); err = insert_page(vma, addr, page, pgprot, mkwrite); } else { return insert_pfn(vma, addr, pfn, pgprot, mkwrite); @@ -2674,7 +2657,7 @@ vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page, EXPORT_SYMBOL_GPL(vmf_insert_page_mkwrite); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn) + unsigned long pfn) { return __vm_insert_mixed(vma, addr, pfn, false); } @@ -2686,7 +2669,7 @@ EXPORT_SYMBOL(vmf_insert_mixed); * the same entry was actually inserted. */ vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn) + unsigned long addr, unsigned long pfn) { return __vm_insert_mixed(vma, addr, pfn, true); } @@ -5405,10 +5388,10 @@ fallback: /* * Using per-page fault to maintain the uffd semantics, and same - * approach also applies to non-anonymous-shmem faults to avoid + * approach also applies to non shmem/tmpfs faults to avoid * inflating the RSS of the process. */ - if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) || + if (!vma_is_shmem(vma) || unlikely(userfaultfd_armed(vma)) || unlikely(needs_fallback)) { nr_pages = 1; } else if (nr_pages > 1) { @@ -6128,7 +6111,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, .gfp_mask = __get_fault_gfp_mask(vma), }; struct mm_struct *mm = vma->vm_mm; - unsigned long vm_flags = vma->vm_flags; + vm_flags_t vm_flags = vma->vm_flags; pgd_t *pgd; p4d_t *p4d; vm_fault_t ret; @@ -6152,7 +6135,7 @@ retry_pud: pud_t orig_pud = *vmf.pud; barrier(); - if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { + if (pud_trans_huge(orig_pud)) { /* * TODO once we support anonymous PUDs: NUMA case and @@ -6193,7 +6176,7 @@ retry_pud: pmd_migration_entry_wait(mm, vmf.pmd); return 0; } - if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) { + if (pmd_trans_huge(vmf.orig_pmd)) { if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf); @@ -6720,6 +6703,7 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr, while (len) { int bytes, offset; void *maddr; + struct folio *folio; struct vm_area_struct *vma = NULL; struct page *page = get_user_page_vma_remote(mm, addr, gup_flags, &vma); @@ -6751,21 +6735,22 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr, if (bytes <= 0) break; } else { + folio = page_folio(page); bytes = len; offset = addr & (PAGE_SIZE-1); if (bytes > PAGE_SIZE-offset) bytes = PAGE_SIZE-offset; - maddr = kmap_local_page(page); + maddr = kmap_local_folio(folio, folio_page_idx(folio, page) * PAGE_SIZE); if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); - set_page_dirty_lock(page); + folio_mark_dirty_lock(folio); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); } - unmap_and_put_page(page, maddr); + folio_release_kmap(folio, maddr); } len -= bytes; buf += bytes; @@ -6844,6 +6829,7 @@ static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, while (len) { int bytes, offset, retval; void *maddr; + struct folio *folio; struct page *page; struct vm_area_struct *vma = NULL; @@ -6859,17 +6845,18 @@ static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, goto out; } + folio = page_folio(page); bytes = len; offset = addr & (PAGE_SIZE - 1); if (bytes > PAGE_SIZE - offset) bytes = PAGE_SIZE - offset; - maddr = kmap_local_page(page); + maddr = kmap_local_folio(folio, folio_page_idx(folio, page) * PAGE_SIZE); retval = strscpy(buf, maddr + offset, bytes); if (retval >= 0) { /* Found the end of the string */ buf += retval; - unmap_and_put_page(page, maddr); + folio_release_kmap(folio, maddr); break; } @@ -6887,7 +6874,7 @@ static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, } len -= bytes; - unmap_and_put_page(page, maddr); + folio_release_kmap(folio, maddr); } out: |