diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 194 |
1 files changed, 96 insertions, 98 deletions
diff --git a/mm/slub.c b/mm/slub.c index f3d61b330a76..30003763d224 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -23,6 +23,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kasan.h> +#include <linux/node.h> #include <linux/kmsan.h> #include <linux/cpu.h> #include <linux/cpuset.h> @@ -91,14 +92,14 @@ * The partially empty slabs cached on the CPU partial list are used * for performance reasons, which speeds up the allocation process. * These slabs are not frozen, but are also exempt from list management, - * by clearing the PG_workingset flag when moving out of the node + * by clearing the SL_partial flag when moving out of the node * partial list. Please see __slab_free() for more details. * * To sum up, the current scheme is: - * - node partial slab: PG_Workingset && !frozen - * - cpu partial slab: !PG_Workingset && !frozen - * - cpu slab: !PG_Workingset && frozen - * - full slab: !PG_Workingset && !frozen + * - node partial slab: SL_partial && !frozen + * - cpu partial slab: !SL_partial && !frozen + * - cpu slab: !SL_partial && frozen + * - full slab: !SL_partial && !frozen * * list_lock * @@ -183,6 +184,22 @@ * the fast path and disables lockless freelists. */ +/** + * enum slab_flags - How the slab flags bits are used. + * @SL_locked: Is locked with slab_lock() + * @SL_partial: On the per-node partial list + * @SL_pfmemalloc: Was allocated from PF_MEMALLOC reserves + * + * The slab flags share space with the page flags but some bits have + * different interpretations. The high bits are used for information + * like zone/node/section. + */ +enum slab_flags { + SL_locked = PG_locked, + SL_partial = PG_workingset, /* Historical reasons for this bit */ + SL_pfmemalloc = PG_active, /* Historical reasons for this bit */ +}; + /* * We could simply use migrate_disable()/enable() but as long as it's a * function call even on !PREEMPT_RT, use inline preempt_disable() there. @@ -447,7 +464,7 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) /* * Tracks for which NUMA nodes we have kmem_cache_nodes allocated. - * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily + * Corresponds to node_state[N_MEMORY], but can temporarily * differ during memory hotplug/hotremove operations. * Protected by slab_mutex. */ @@ -635,16 +652,35 @@ static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) #endif /* CONFIG_SLUB_CPU_PARTIAL */ /* + * If network-based swap is enabled, slub must keep track of whether memory + * were allocated from pfmemalloc reserves. + */ +static inline bool slab_test_pfmemalloc(const struct slab *slab) +{ + return test_bit(SL_pfmemalloc, &slab->flags); +} + +static inline void slab_set_pfmemalloc(struct slab *slab) +{ + set_bit(SL_pfmemalloc, &slab->flags); +} + +static inline void __slab_clear_pfmemalloc(struct slab *slab) +{ + __clear_bit(SL_pfmemalloc, &slab->flags); +} + +/* * Per slab locking using the pagelock */ static __always_inline void slab_lock(struct slab *slab) { - bit_spin_lock(PG_locked, &slab->__page_flags); + bit_spin_lock(SL_locked, &slab->flags); } static __always_inline void slab_unlock(struct slab *slab) { - bit_spin_unlock(PG_locked, &slab->__page_flags); + bit_spin_unlock(SL_locked, &slab->flags); } static inline bool @@ -1010,7 +1046,7 @@ static void print_slab_info(const struct slab *slab) { pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", slab, slab->objects, slab->inuse, slab->freelist, - &slab->__page_flags); + &slab->flags); } void skip_orig_size_check(struct kmem_cache *s, const void *object) @@ -1973,6 +2009,11 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ __GFP_ACCOUNT | __GFP_NOFAIL) +static inline void init_slab_obj_exts(struct slab *slab) +{ + slab->obj_exts = 0; +} + int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2023,8 +2064,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2044,20 +2084,12 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; +#else /* CONFIG_SLAB_OBJ_EXT */ - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; +static inline void init_slab_obj_exts(struct slab *slab) +{ } -#else /* CONFIG_SLAB_OBJ_EXT */ - static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2068,11 +2100,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2093,10 +2120,11 @@ prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p) slab = virt_to_slab(p); if (!slab_obj_exts(slab) && - WARN(alloc_slab_obj_exts(slab, s, flags, false), - "%s, %s: Failed to create slab extension vector!\n", - __func__, s->name)) + alloc_slab_obj_exts(slab, s, flags, false)) { + pr_warn_once("%s, %s: Failed to create slab extension vector!\n", + __func__, s->name); return NULL; + } return slab_obj_exts(slab) + obj_to_index(s, slab, p); } @@ -2120,7 +2148,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2592,8 +2620,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); @@ -2637,6 +2669,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->objects = oo_objects(oo); slab->inuse = 0; slab->frozen = 0; + init_slab_obj_exts(slab); account_slab(slab, oo_order(oo), s, flags); @@ -2720,23 +2753,19 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab) free_slab(s, slab); } -/* - * SLUB reuses PG_workingset bit to keep track of whether it's on - * the per-node partial list. - */ static inline bool slab_test_node_partial(const struct slab *slab) { - return folio_test_workingset(slab_folio(slab)); + return test_bit(SL_partial, &slab->flags); } static inline void slab_set_node_partial(struct slab *slab) { - set_bit(PG_workingset, folio_flags(slab_folio(slab), 0)); + set_bit(SL_partial, &slab->flags); } static inline void slab_clear_node_partial(struct slab *slab) { - clear_bit(PG_workingset, folio_flags(slab_folio(slab), 0)); + clear_bit(SL_partial, &slab->flags); } /* @@ -4272,7 +4301,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) flags = kmalloc_fix_flags(flags); flags |= __GFP_COMP; - folio = (struct folio *)alloc_pages_node_noprof(node, flags, order); + + if (node == NUMA_NO_NODE) + folio = (struct folio *)alloc_frozen_pages_noprof(flags, order); + else + folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL); + if (folio) { ptr = folio_address(folio); lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, @@ -4768,7 +4802,7 @@ static void free_large_kmalloc(struct folio *folio, void *object) lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __folio_clear_large_kmalloc(folio); - folio_put(folio); + free_frozen_pages(&folio->page, order); } /* @@ -4933,12 +4967,12 @@ alloc_new: * When slub_debug_orig_size() is off, krealloc() only knows about the bucket * size of an allocation (but not the exact size it was allocated with) and * hence implements the following semantics for shrinking and growing buffers - * with __GFP_ZERO. + * with __GFP_ZERO:: * - * new bucket - * 0 size size - * |--------|----------------| - * | keep | zero | + * new bucket + * 0 size size + * |--------|----------------| + * | keep | zero | * * Otherwise, the original allocation size 'orig_size' could be used to * precisely clear the requested size, and the new size will also be stored @@ -4972,14 +5006,16 @@ static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size) * We want to attempt a large physically contiguous block first because * it is less likely to fragment multiple larger blocks and therefore * contribute to a long term fragmentation less than vmalloc fallback. - * However make sure that larger requests are not too disruptive - no - * OOM killer and no allocation failure warnings as we have a fallback. + * However make sure that larger requests are not too disruptive - i.e. + * do not direct reclaim unless physically continuous memory is preferred + * (__GFP_RETRY_MAYFAIL mode). We still kick in kswapd/kcompactd to + * start working in the background */ if (size > PAGE_SIZE) { flags |= __GFP_NOWARN; if (!(flags & __GFP_RETRY_MAYFAIL)) - flags |= __GFP_NORETRY; + flags &= ~__GFP_DIRECT_RECLAIM; /* nofail semantic is implemented by the vmalloc fallback */ flags &= ~__GFP_NOFAIL; @@ -6150,7 +6186,7 @@ int __kmem_cache_shrink(struct kmem_cache *s) return __kmem_cache_do_shrink(s); } -static int slab_mem_going_offline_callback(void *arg) +static int slab_mem_going_offline_callback(void) { struct kmem_cache *s; @@ -6164,46 +6200,13 @@ static int slab_mem_going_offline_callback(void *arg) return 0; } -static void slab_mem_offline_callback(void *arg) -{ - struct memory_notify *marg = arg; - int offline_node; - - offline_node = marg->status_change_nid_normal; - - /* - * If the node still has available memory. we need kmem_cache_node - * for it yet. - */ - if (offline_node < 0) - return; - - mutex_lock(&slab_mutex); - node_clear(offline_node, slab_nodes); - /* - * We no longer free kmem_cache_node structures here, as it would be - * racy with all get_node() users, and infeasible to protect them with - * slab_mutex. - */ - mutex_unlock(&slab_mutex); -} - -static int slab_mem_going_online_callback(void *arg) +static int slab_mem_going_online_callback(int nid) { struct kmem_cache_node *n; struct kmem_cache *s; - struct memory_notify *marg = arg; - int nid = marg->status_change_nid_normal; int ret = 0; /* - * If the node's memory is already available, then kmem_cache_node is - * already created. Nothing to do. - */ - if (nid < 0) - return 0; - - /* * We are bringing a node online. No memory is available yet. We must * allocate a kmem_cache_node structure in order to bring the node * online. @@ -6242,21 +6245,16 @@ out: static int slab_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { + struct node_notify *nn = arg; + int nid = nn->nid; int ret = 0; switch (action) { - case MEM_GOING_ONLINE: - ret = slab_mem_going_online_callback(arg); - break; - case MEM_GOING_OFFLINE: - ret = slab_mem_going_offline_callback(arg); - break; - case MEM_OFFLINE: - case MEM_CANCEL_ONLINE: - slab_mem_offline_callback(arg); + case NODE_ADDING_FIRST_MEMORY: + ret = slab_mem_going_online_callback(nid); break; - case MEM_ONLINE: - case MEM_CANCEL_OFFLINE: + case NODE_REMOVING_LAST_MEMORY: + ret = slab_mem_going_offline_callback(); break; } if (ret) @@ -6324,14 +6322,14 @@ void __init kmem_cache_init(void) * Initialize the nodemask for which we will allocate per node * structures. Here we don't need taking slab_mutex yet. */ - for_each_node_state(node, N_NORMAL_MEMORY) + for_each_node_state(node, N_MEMORY) node_set(node, slab_nodes); create_boot_cache(kmem_cache_node, "kmem_cache_node", sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0); - hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); + hotplug_node_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); /* Able to allocate the per node structures */ slab_state = PARTIAL; |