diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/damon/vaddr.c | 8 | ||||
| -rw-r--r-- | mm/huge_memory.c | 15 | ||||
| -rw-r--r-- | mm/hugetlb.c | 2 | ||||
| -rw-r--r-- | mm/khugepaged.c | 5 | ||||
| -rw-r--r-- | mm/kmsan/hooks.c | 3 | ||||
| -rw-r--r-- | mm/ksm.c | 27 | ||||
| -rw-r--r-- | mm/madvise.c | 4 | ||||
| -rw-r--r-- | mm/memblock.c | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 7 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 32 | ||||
| -rw-r--r-- | mm/migrate.c | 23 | ||||
| -rw-r--r-- | mm/slub.c | 98 | ||||
| -rw-r--r-- | mm/util.c | 3 |
13 files changed, 138 insertions, 93 deletions
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 8c048f9b129e..7e834467b2d8 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -328,10 +328,8 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, } pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte) { - walk->action = ACTION_AGAIN; + if (!pte) return 0; - } if (!pte_present(ptep_get(pte))) goto out; damon_ptep_mkold(pte, walk->vma, addr); @@ -481,10 +479,8 @@ regular_page: #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (!pte) { - walk->action = ACTION_AGAIN; + if (!pte) return 0; - } ptent = ptep_get(pte); if (!pte_present(ptent)) goto out; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5acca24bbabb..1b81680b4225 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4104,32 +4104,23 @@ static unsigned long deferred_split_count(struct shrinker *shrink, static bool thp_underused(struct folio *folio) { int num_zero_pages = 0, num_filled_pages = 0; - void *kaddr; int i; if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) return false; for (i = 0; i < folio_nr_pages(folio); i++) { - kaddr = kmap_local_folio(folio, i * PAGE_SIZE); - if (!memchr_inv(kaddr, 0, PAGE_SIZE)) { - num_zero_pages++; - if (num_zero_pages > khugepaged_max_ptes_none) { - kunmap_local(kaddr); + if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { + if (++num_zero_pages > khugepaged_max_ptes_none) return true; - } } else { /* * Another path for early exit once the number * of non-zero filled pages exceeds threshold. */ - num_filled_pages++; - if (num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) { - kunmap_local(kaddr); + if (++num_filled_pages >= HPAGE_PMD_NR - khugepaged_max_ptes_none) return false; - } } - kunmap_local(kaddr); } return false; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6cac826cb61f..795ee393eac0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7222,6 +7222,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, psize); } spin_unlock(ptl); + + cond_resched(); } /* * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7ab2d1a42df3..abe54f0043c7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -376,10 +376,7 @@ int hugepage_madvise(struct vm_area_struct *vma, int __init khugepaged_init(void) { - mm_slot_cache = kmem_cache_create("khugepaged_mm_slot", - sizeof(struct mm_slot), - __alignof__(struct mm_slot), - 0, NULL); + mm_slot_cache = KMEM_CACHE(mm_slot, 0); if (!mm_slot_cache) return -ENOMEM; diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 90bee565b9bc..2cee59d89c80 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -339,13 +339,12 @@ static void kmsan_handle_dma_page(const void *addr, size_t size, void kmsan_handle_dma(phys_addr_t phys, size_t size, enum dma_data_direction dir) { - struct page *page = phys_to_page(phys); u64 page_offset, to_go; void *addr; if (PhysHighMem(phys)) return; - addr = page_to_virt(page); + addr = phys_to_virt(phys); /* * The kernel may occasionally give us adjacent DMA pages not belonging * to the same allocation. Process them separately to avoid triggering @@ -2921,7 +2921,7 @@ int __ksm_enter(struct mm_struct *mm) void __ksm_exit(struct mm_struct *mm) { - struct ksm_mm_slot *mm_slot; + struct ksm_mm_slot *mm_slot = NULL; struct mm_slot *slot; int easy_to_free = 0; @@ -2936,19 +2936,20 @@ void __ksm_exit(struct mm_struct *mm) spin_lock(&ksm_mmlist_lock); slot = mm_slot_lookup(mm_slots_hash, mm); - if (slot) { - mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); - if (ksm_scan.mm_slot != mm_slot) { - if (!mm_slot->rmap_list) { - hash_del(&slot->hash); - list_del(&slot->mm_node); - easy_to_free = 1; - } else { - list_move(&slot->mm_node, - &ksm_scan.mm_slot->slot.mm_node); - } - } + if (!slot) + goto unlock; + mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); + if (ksm_scan.mm_slot == mm_slot) + goto unlock; + if (!mm_slot->rmap_list) { + hash_del(&slot->hash); + list_del(&slot->mm_node); + easy_to_free = 1; + } else { + list_move(&slot->mm_node, + &ksm_scan.mm_slot->slot.mm_node); } +unlock: spin_unlock(&ksm_mmlist_lock); if (easy_to_free) { diff --git a/mm/madvise.c b/mm/madvise.c index 35ed4ab0d7c5..fb1c86e630b6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1071,8 +1071,8 @@ static bool is_valid_guard_vma(struct vm_area_struct *vma, bool allow_locked) static bool is_guard_pte_marker(pte_t ptent) { - return is_pte_marker(ptent) && - is_guard_swp_entry(pte_to_swp_entry(ptent)); + return is_swap_pte(ptent) && + is_guard_swp_entry(pte_to_swp_entry(ptent)); } static int guard_install_pud_entry(pud_t *pud, unsigned long addr, diff --git a/mm/memblock.c b/mm/memblock.c index 120a501a887a..e23e16618e9b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2452,8 +2452,10 @@ static int reserve_mem_kho_finalize(struct kho_serialization *ser) for (i = 0; i < reserved_mem_count; i++) { struct reserve_mem_table *map = &reserved_mem_table[i]; + struct page *page = phys_to_page(map->start); + unsigned int nr_pages = map->size >> PAGE_SHIFT; - err |= kho_preserve_phys(map->start, map->size); + err |= kho_preserve_pages(page, nr_pages); } err |= kho_preserve_folio(page_folio(kho_fdt)); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e090f29eb03b..4deda33625f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, bool drained = false; bool raised_max_event = false; unsigned long pflags; + bool allow_spinning = gfpflags_allow_spinning(gfp_mask); retry: if (consume_stock(memcg, nr_pages)) return 0; - if (!gfpflags_allow_spinning(gfp_mask)) + if (!allow_spinning) /* Avoid the refill and flush of the older stock */ batch = nr_pages; @@ -2348,7 +2349,7 @@ retry: if (!gfpflags_allow_blocking(gfp_mask)) goto nomem; - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); raised_max_event = true; psi_memstall_enter(&pflags); @@ -2415,7 +2416,7 @@ force: * a MEMCG_MAX event. */ if (!raised_max_event) - memcg_memory_event(mem_over_limit, MEMCG_MAX); + __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning); /* * The allocation either can't fail or will lead to more memory diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e9f14de4a9c9..0be83039c3b5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1477,7 +1477,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group, } /* create memory block devices after memory was added */ - ret = create_memory_block_devices(cur_start, memblock_size, + ret = create_memory_block_devices(cur_start, memblock_size, nid, params.altmap, group); if (ret) { arch_remove_memory(cur_start, memblock_size, NULL); @@ -1539,8 +1539,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) ret = __try_online_node(nid, false); if (ret < 0) - goto error; - new_node = ret; + goto error_memblock_remove; + if (ret) { + node_set_online(nid); + ret = register_one_node(nid); + if (WARN_ON(ret)) { + node_set_offline(nid); + goto error_memblock_remove; + } + new_node = true; + } /* * Self hosted memmap array @@ -1556,24 +1564,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) goto error; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, NULL, group); + ret = create_memory_block_devices(start, size, nid, NULL, group); if (ret) { arch_remove_memory(start, size, params.altmap); goto error; } } - if (new_node) { - /* If sysfs file of new node can't be created, cpu on the node - * can't be hot-added. There is no rollback way now. - * So, check by BUG_ON() to catch it reluctantly.. - * We online node here. We can't roll back from here. - */ - node_set_online(nid); - ret = register_one_node(nid); - BUG_ON(ret); - } - register_memory_blocks_under_node_hotplug(nid, PFN_DOWN(start), PFN_UP(start + size - 1)); @@ -1597,6 +1594,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) return ret; error: + if (new_node) { + node_set_offline(nid); + unregister_one_node(nid); + } +error_memblock_remove: if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); error_mem_hotplug_end: diff --git a/mm/migrate.c b/mm/migrate.c index aee61a980374..e3065c9edb55 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -296,19 +296,16 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list) } static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, - struct folio *folio, - unsigned long idx) + struct folio *folio, pte_t old_pte, unsigned long idx) { struct page *page = folio_page(folio, idx); - bool contains_data; pte_t newpte; - void *addr; if (PageCompound(page)) return false; VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page); + VM_BUG_ON_PAGE(pte_present(old_pte), page); if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || mm_forbids_zeropage(pvmw->vma->vm_mm)) @@ -319,15 +316,17 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, * this subpage has been non present. If the subpage is only zero-filled * then map it to the shared zeropage. */ - addr = kmap_local_page(page); - contains_data = memchr_inv(addr, 0, PAGE_SIZE); - kunmap_local(addr); - - if (contains_data) + if (!pages_identical(page, ZERO_PAGE(0))) return false; newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), pvmw->vma->vm_page_prot)); + + if (pte_swp_soft_dirty(old_pte)) + newpte = pte_mksoft_dirty(newpte); + if (pte_swp_uffd_wp(old_pte)) + newpte = pte_mkuffd_wp(newpte); + set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); @@ -370,13 +369,13 @@ static bool remove_migration_pte(struct folio *folio, continue; } #endif + old_pte = ptep_get(pvmw.pte); if (rmap_walk_arg->map_unused_to_zeropage && - try_to_map_unused_to_zeropage(&pvmw, folio, idx)) + try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx)) continue; folio_get(folio); pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); - old_pte = ptep_get(pvmw.pte); entry = pte_to_swp_entry(old_pte); if (!is_migration_entry_young(entry)) diff --git a/mm/slub.c b/mm/slub.c index 584a5ff1828b..a8fcc7e6f25a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -504,10 +504,18 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) return s->node[node]; } -/* Get the barn of the current cpu's memory node */ +/* + * Get the barn of the current cpu's closest memory node. It may not exist on + * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES + */ static inline struct node_barn *get_barn(struct kmem_cache *s) { - return get_node(s, numa_mem_id())->barn; + struct kmem_cache_node *n = get_node(s, numa_mem_id()); + + if (!n) + return NULL; + + return n->barn; } /* @@ -2152,7 +2160,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } - kmemleak_not_leak(vec); + if (allow_spin) + kmemleak_not_leak(vec); return 0; } @@ -2161,8 +2170,15 @@ static inline void free_slab_obj_exts(struct slab *slab) struct slabobj_ext *obj_exts; obj_exts = slab_obj_exts(slab); - if (!obj_exts) + if (!obj_exts) { + /* + * If obj_exts allocation failed, slab->obj_exts is set to + * OBJEXTS_ALLOC_FAIL. In this case, we end up here and should + * clear the flag. + */ + slab->obj_exts = 0; return; + } /* * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its @@ -4981,6 +4997,10 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, } barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return NULL; + } full = barn_replace_empty_sheaf(barn, pcs->main); @@ -5152,13 +5172,20 @@ next_batch: if (unlikely(pcs->main->size == 0)) { struct slab_sheaf *full; + struct node_barn *barn; if (pcs->spare && pcs->spare->size > 0) { swap(pcs->main, pcs->spare); goto do_alloc; } - full = barn_replace_empty_sheaf(get_barn(s), pcs->main); + barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return allocated; + } + + full = barn_replace_empty_sheaf(barn, pcs->main); if (full) { stat(s, BARN_GET); @@ -5313,6 +5340,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) { struct slub_percpu_sheaves *pcs; struct slab_sheaf *sheaf = NULL; + struct node_barn *barn; if (unlikely(size > s->sheaf_capacity)) { @@ -5354,8 +5382,11 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) pcs->spare = NULL; stat(s, SHEAF_PREFILL_FAST); } else { + barn = get_barn(s); + stat(s, SHEAF_PREFILL_SLOW); - sheaf = barn_get_full_or_empty_sheaf(get_barn(s)); + if (barn) + sheaf = barn_get_full_or_empty_sheaf(barn); if (sheaf && sheaf->size) stat(s, BARN_GET); else @@ -5425,7 +5456,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, * If the barn has too many full sheaves or we fail to refill the sheaf, * simply flush and free it. */ - if (data_race(barn->nr_full) >= MAX_FULL_SHEAVES || + if (!barn || data_race(barn->nr_full) >= MAX_FULL_SHEAVES || refill_sheaf(s, sheaf, gfp)) { sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); @@ -5942,10 +5973,9 @@ slab_empty: * put the full sheaf there. */ static void __pcs_install_empty_sheaf(struct kmem_cache *s, - struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty) + struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty, + struct node_barn *barn) { - struct node_barn *barn; - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); /* This is what we expect to find if nobody interrupted us. */ @@ -5955,8 +5985,6 @@ static void __pcs_install_empty_sheaf(struct kmem_cache *s, return; } - barn = get_barn(s); - /* * Unlikely because if the main sheaf had space, we would have just * freed to it. Get rid of our empty sheaf. @@ -6001,6 +6029,11 @@ restart: lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + return NULL; + } + put_fail = false; if (!pcs->spare) { @@ -6083,7 +6116,7 @@ got_empty: } pcs = this_cpu_ptr(s->cpu_sheaves); - __pcs_install_empty_sheaf(s, pcs, empty); + __pcs_install_empty_sheaf(s, pcs, empty, barn); return pcs; } @@ -6120,8 +6153,9 @@ bool free_to_pcs(struct kmem_cache *s, void *object) static void rcu_free_sheaf(struct rcu_head *head) { + struct kmem_cache_node *n; struct slab_sheaf *sheaf; - struct node_barn *barn; + struct node_barn *barn = NULL; struct kmem_cache *s; sheaf = container_of(head, struct slab_sheaf, rcu_head); @@ -6138,7 +6172,11 @@ static void rcu_free_sheaf(struct rcu_head *head) */ __rcu_free_sheaf_prepare(s, sheaf); - barn = get_node(s, sheaf->node)->barn; + n = get_node(s, sheaf->node); + if (!n) + goto flush; + + barn = n->barn; /* due to slab_free_hook() */ if (unlikely(sheaf->size == 0)) @@ -6156,11 +6194,12 @@ static void rcu_free_sheaf(struct rcu_head *head) return; } +flush: stat(s, BARN_PUT_FAIL); sheaf_flush_unused(s, sheaf); empty: - if (data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { + if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { barn_put_empty_sheaf(barn, sheaf); return; } @@ -6190,6 +6229,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) } barn = get_barn(s); + if (!barn) { + local_unlock(&s->cpu_sheaves->lock); + goto fail; + } empty = barn_get_empty_sheaf(barn); @@ -6303,6 +6346,8 @@ next_batch: goto do_free; barn = get_barn(s); + if (!barn) + goto no_empty; if (!pcs->spare) { empty = barn_get_empty_sheaf(barn); @@ -6405,15 +6450,16 @@ static void free_deferred_objects(struct irq_work *work) slab = virt_to_slab(x); s = slab->slab_cache; + /* Point 'x' back to the beginning of allocated object */ + x -= s->offset; + /* * We used freepointer in 'x' to link 'x' into df->objects. * Clear it to NULL to avoid false positive detection * of "Freepointer corruption". */ - *(void **)x = NULL; + set_freepointer(s, x, NULL); - /* Point 'x' back to the beginning of allocated object */ - x -= s->offset; __slab_free(s, slab, x, x, 1, _THIS_IP_); } @@ -6431,17 +6477,24 @@ static void free_deferred_objects(struct irq_work *work) static void defer_free(struct kmem_cache *s, void *head) { - struct defer_free *df = this_cpu_ptr(&defer_free_objects); + struct defer_free *df; + + guard(preempt)(); + df = this_cpu_ptr(&defer_free_objects); if (llist_add(head + s->offset, &df->objects)) irq_work_queue(&df->work); } static void defer_deactivate_slab(struct slab *slab, void *flush_freelist) { - struct defer_free *df = this_cpu_ptr(&defer_free_objects); + struct defer_free *df; slab->flush_freelist = flush_freelist; + + guard(preempt)(); + + df = this_cpu_ptr(&defer_free_objects); if (llist_add(&slab->llnode, &df->slabs)) irq_work_queue(&df->work); } @@ -7693,7 +7746,8 @@ void __kmem_cache_release(struct kmem_cache *s) pcs_destroy(s); #ifndef CONFIG_SLUB_TINY #ifdef CONFIG_PREEMPT_RT - lockdep_unregister_key(&s->lock_key); + if (s->cpu_slab) + lockdep_unregister_key(&s->lock_key); #endif free_percpu(s->cpu_slab); #endif diff --git a/mm/util.c b/mm/util.c index 6c1d64ed0221..8989d5767528 100644 --- a/mm/util.c +++ b/mm/util.c @@ -566,6 +566,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { + loff_t off = (loff_t)pgoff << PAGE_SHIFT; unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; @@ -573,7 +574,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, ret = security_mmap_file(file, prot, flag); if (!ret) - ret = fsnotify_mmap_perm(file, prot, pgoff >> PAGE_SHIFT, len); + ret = fsnotify_mmap_perm(file, prot, off, len); if (!ret) { if (mmap_write_lock_killable(mm)) return -EINTR; |
