From 346907ceb9d11b9e22677c142b45ff50dd20a66a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 16 Nov 2022 15:56:32 +0100 Subject: mm, slab: ignore hardened usercopy parameters when disabled With CONFIG_HARDENED_USERCOPY not enabled, there are no __check_heap_object() checks happening that would use the struct kmem_cache useroffset and usersize fields. Yet the fields are still initialized, preventing merging of otherwise compatible caches. Also the fields contribute to struct kmem_cache size unnecessarily when unused. Thus #ifdef them out completely when CONFIG_HARDENED_USERCOPY is disabled. In kmem_dump_obj() print object_size instead of usersize, as that's actually the intention. In a quick virtme boot test, this has reduced the number of caches in /proc/slabinfo from 131 to 111. Cc: Kees Cook Signed-off-by: Vlastimil Babka Acked-by: Roman Gushchin Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- include/linux/slab_def.h | 2 ++ include/linux/slub_def.h | 2 ++ mm/slab.h | 2 -- mm/slab_common.c | 13 ++++++++++--- mm/slub.c | 4 ++++ 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index f0ffad6a3365..5834bad8ad78 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -80,8 +80,10 @@ struct kmem_cache { unsigned int *random_seq; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f9c68a9dac04..7ed5e455cbf4 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -136,8 +136,10 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/mm/slab.h b/mm/slab.h index 0202a8c2f0d2..db9a7984e22e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -207,8 +207,6 @@ struct kmem_cache { unsigned int size; /* The aligned/padded/added on size */ unsigned int align; /* Alignment as calculated */ slab_flags_t flags; /* Active flags on the slab */ - unsigned int useroffset;/* Usercopy region offset */ - unsigned int usersize; /* Usercopy region size */ const char *name; /* Slab name for sysfs */ int refcount; /* Use counter */ void (*ctor)(void *); /* Called on object slot creation */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 0042fb2730d1..af0f370fe7a2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -143,8 +143,10 @@ int slab_unmergeable(struct kmem_cache *s) if (s->ctor) return 1; +#ifdef CONFIG_HARDENED_USERCOPY if (s->usersize) return 1; +#endif /* * We may have set a slab to be unmergeable during bootstrap. @@ -223,8 +225,10 @@ static struct kmem_cache *create_cache(const char *name, s->size = s->object_size = object_size; s->align = align; s->ctor = ctor; +#ifdef CONFIG_HARDENED_USERCOPY s->useroffset = useroffset; s->usersize = usersize; +#endif err = __kmem_cache_create(s, flags); if (err) @@ -317,7 +321,8 @@ kmem_cache_create_usercopy(const char *name, flags &= CACHE_CREATE_MASK; /* Fail closed on bad usersize of useroffset values. */ - if (WARN_ON(!usersize && useroffset) || + if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) || + WARN_ON(!usersize && useroffset) || WARN_ON(size < usersize || size - usersize < useroffset)) usersize = useroffset = 0; @@ -595,8 +600,8 @@ void kmem_dump_obj(void *object) ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset; pr_cont(" pointer offset %lu", ptroffset); } - if (kp.kp_slab_cache && kp.kp_slab_cache->usersize) - pr_cont(" size %u", kp.kp_slab_cache->usersize); + if (kp.kp_slab_cache && kp.kp_slab_cache->object_size) + pr_cont(" size %u", kp.kp_slab_cache->object_size); if (kp.kp_ret) pr_cont(" allocated at %pS\n", kp.kp_ret); else @@ -640,8 +645,10 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, align = max(align, size); s->align = calculate_alignment(flags, align, size); +#ifdef CONFIG_HARDENED_USERCOPY s->useroffset = useroffset; s->usersize = usersize; +#endif err = __kmem_cache_create(s, flags); diff --git a/mm/slub.c b/mm/slub.c index 157527d7101b..e32db8540767 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5502,11 +5502,13 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(cache_dma); #endif +#ifdef CONFIG_HARDENED_USERCOPY static ssize_t usersize_show(struct kmem_cache *s, char *buf) { return sysfs_emit(buf, "%u\n", s->usersize); } SLAB_ATTR_RO(usersize); +#endif static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { @@ -5803,7 +5805,9 @@ static struct attribute *slab_attrs[] = { #ifdef CONFIG_FAILSLAB &failslab_attr.attr, #endif +#ifdef CONFIG_HARDENED_USERCOPY &usersize_attr.attr, +#endif #ifdef CONFIG_KFENCE &skip_kfence_attr.attr, #endif -- cgit From e240e53ae0abb0896e0f399bdfef41c69cec3123 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Nov 2022 18:13:45 +0100 Subject: mm, slub: add CONFIG_SLUB_TINY For tiny systems that have used SLOB until now, SLUB might be impractical due to its higher memory usage. To help with that, introduce an option CONFIG_SLUB_TINY that modifies SLUB to use less memory. This is done by sacrificing scalability, security and debugging features, therefore not recommended for any system with more than 16MB RAM. This commit introduces the option and uses it to set other related options in a way that reduces memory usage. Signed-off-by: Vlastimil Babka Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- lib/Kconfig.kasan | 2 +- mm/Kconfig | 21 +++++++++++++++++---- mm/Kconfig.debug | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index ca09b1cf8ee9..836f70393e22 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -37,7 +37,7 @@ menuconfig KASAN (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \ CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ HAVE_ARCH_KASAN_HW_TAGS - depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) + depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB) select STACKDEPOT_ALWAYS_INIT help Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b505..6701d72d3037 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -230,6 +230,19 @@ config SLOB endchoice +config SLUB_TINY + bool "Configure SLUB for minimal memory footprint" + depends on SLUB && EXPERT + select SLAB_MERGE_DEFAULT + help + Configures the SLUB allocator in a way to achieve minimal memory + footprint, sacrificing scalability, debugging and other features. + This is intended only for the smallest system that had used the + SLOB allocator and is not recommended for systems with more than + 16MB RAM. + + If unsure, say N. + config SLAB_MERGE_DEFAULT bool "Allow slab caches to be merged" default y @@ -247,7 +260,7 @@ config SLAB_MERGE_DEFAULT config SLAB_FREELIST_RANDOM bool "Randomize slab freelist" - depends on SLAB || SLUB + depends on SLAB || (SLUB && !SLUB_TINY) help Randomizes the freelist order used on creating new pages. This security feature reduces the predictability of the kernel slab @@ -255,7 +268,7 @@ config SLAB_FREELIST_RANDOM config SLAB_FREELIST_HARDENED bool "Harden slab freelist metadata" - depends on SLAB || SLUB + depends on SLAB || (SLUB && !SLUB_TINY) help Many kernel heap attacks try to target slab cache metadata and other infrastructure. This options makes minor performance @@ -267,7 +280,7 @@ config SLAB_FREELIST_HARDENED config SLUB_STATS default n bool "Enable SLUB performance statistics" - depends on SLUB && SYSFS + depends on SLUB && SYSFS && !SLUB_TINY help SLUB statistics are useful to debug SLUBs allocation behavior in order find ways to optimize the allocator. This should never be @@ -279,7 +292,7 @@ config SLUB_STATS config SLUB_CPU_PARTIAL default y - depends on SLUB && SMP + depends on SLUB && SMP && !SLUB_TINY bool "SLUB per cpu partial cache" help Per cpu partial caches accelerate objects allocation and freeing diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index ce8dded36de9..fca699ad1fb0 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -56,7 +56,7 @@ config DEBUG_SLAB config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT - depends on SLUB && SYSFS + depends on SLUB && SYSFS && !SLUB_TINY select STACKDEPOT if STACKTRACE_SUPPORT help SLUB has extensive debug support features. Disabling these can -- cgit From b1a413a39a1a7694acf3636a52c109821148ecdd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Nov 2022 18:18:39 +0100 Subject: mm, slub: disable SYSFS support with CONFIG_SLUB_TINY Currently SLUB enables its sysfs support depending unconditionally on the general CONFIG_SYSFS setting. To reduce the configuration combination space, make CONFIG_SLUB_TINY disable SLUB's sysfs support by reusing the existing SLAB_SUPPORTS_SYSFS define. It is unlikely that real tiny systems would combine CONFIG_SLUB_TINY with CONFIG_SYSFS, but a randconfig might. Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- include/linux/slub_def.h | 2 +- mm/slub.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 7ed5e455cbf4..95b67863d5cf 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -144,7 +144,7 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_SYSFS +#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); diff --git a/mm/slub.c b/mm/slub.c index e32db8540767..b81ceeb6e6de 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -298,7 +298,7 @@ struct track { enum track_item { TRACK_ALLOC, TRACK_FREE }; -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); #else @@ -2935,7 +2935,7 @@ out: } #endif /* CONFIG_SLUB_DEBUG */ -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) +#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct slab *)) { @@ -2949,7 +2949,7 @@ static unsigned long count_partial(struct kmem_cache_node *n, spin_unlock_irqrestore(&n->list_lock, flags); return x; } -#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ +#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) @@ -4924,7 +4924,7 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; } -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS static int count_inuse(struct slab *slab) { return slab->inuse; @@ -5182,7 +5182,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_SLUB_DEBUG */ -#ifdef CONFIG_SYSFS +#ifdef SLAB_SUPPORTS_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ SL_PARTIAL, /* Only partially allocated slabs */ @@ -6060,7 +6060,7 @@ static int __init slab_sysfs_init(void) } __initcall(slab_sysfs_init); -#endif /* CONFIG_SYSFS */ +#endif /* SLAB_SUPPORTS_SYSFS */ #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) static int slab_debugfs_show(struct seq_file *seq, void *v) -- cgit From 5a8a3c1f73c6488d1a2c18ac1f5308b1fd2aa5f0 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Nov 2022 10:50:28 +0100 Subject: mm, slub: retain no free slabs on partial list with CONFIG_SLUB_TINY SLUB will leave a number of slabs on the partial list even if they are empty, to avoid some slab freeing and reallocation. The goal of CONFIG_SLUB_TINY is to minimize memory overhead, so set the limits to 0 for immediate slab page freeing. Signed-off-by: Vlastimil Babka Acked-by: Roman Gushchin Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- mm/slub.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index b81ceeb6e6de..19b6cf74bdfc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -241,6 +241,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) /* Enable to log cmpxchg failures */ #undef SLUB_DEBUG_CMPXCHG +#ifndef CONFIG_SLUB_TINY /* * Minimum number of partial slabs. These will be left on the partial * lists even if they are empty. kmem_cache_shrink may reclaim them. @@ -253,6 +254,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) * sort the partial list by the number of objects in use. */ #define MAX_PARTIAL 10 +#else +#define MIN_PARTIAL 0 +#define MAX_PARTIAL 0 +#endif #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER) -- cgit From 90ce872c22b248724d1c87232410e3b38536e107 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 21 Nov 2022 11:44:54 +0100 Subject: mm, slub: lower the default slub_max_order with CONFIG_SLUB_TINY With CONFIG_SLUB_TINY we want to minimize memory overhead. By lowering the default slub_max_order we can make slab allocations use smaller pages. However depending on object sizes, order-0 might not be the best due to increased fragmentation. When testing on a 8MB RAM k210 system by Damien Le Moal [1], slub_max_order=1 had the best results, so use that as the default for CONFIG_SLUB_TINY. [1] https://lore.kernel.org/all/6a1883c4-4c3f-545a-90e8-2cd805bcf4ae@opensource.wdc.com/ Signed-off-by: Vlastimil Babka Acked-by: Roman Gushchin Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 19b6cf74bdfc..f4c7f4e3751f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3888,7 +3888,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); * take the list_lock. */ static unsigned int slub_min_order; -static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; +static unsigned int slub_max_order = + IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER; static unsigned int slub_min_objects; /* -- cgit From 2f7c1c1396b587e8cfe18a1f0d628cedaae56b6a Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Nov 2022 18:19:28 +0100 Subject: mm, slub: don't create kmalloc-rcl caches with CONFIG_SLUB_TINY Distinguishing kmalloc(__GFP_RECLAIMABLE) can help against fragmentation by grouping pages by mobility, but on tiny systems the extra memory overhead of separate set of kmalloc-rcl caches will probably be worse, and mobility grouping likely disabled anyway. Thus with CONFIG_SLUB_TINY, don't create kmalloc-rcl caches and use the regular ones. Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- include/linux/slab.h | 9 +++++++-- mm/slab_common.c | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 45efc6c553b8..ae2d19ec8467 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -336,12 +336,17 @@ enum kmalloc_cache_type { #endif #ifndef CONFIG_MEMCG_KMEM KMALLOC_CGROUP = KMALLOC_NORMAL, -#else - KMALLOC_CGROUP, #endif +#ifdef CONFIG_SLUB_TINY + KMALLOC_RECLAIM = KMALLOC_NORMAL, +#else KMALLOC_RECLAIM, +#endif #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, +#endif +#ifdef CONFIG_MEMCG_KMEM + KMALLOC_CGROUP, #endif NR_KMALLOC_TYPES }; diff --git a/mm/slab_common.c b/mm/slab_common.c index af0f370fe7a2..012fc75d3ffa 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -773,10 +773,16 @@ EXPORT_SYMBOL(kmalloc_size_roundup); #define KMALLOC_CGROUP_NAME(sz) #endif +#ifndef CONFIG_SLUB_TINY +#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz, +#else +#define KMALLOC_RCL_NAME(sz) +#endif + #define INIT_KMALLOC_INFO(__size, __short_size) \ { \ .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \ - .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \ + KMALLOC_RCL_NAME(__short_size) \ KMALLOC_CGROUP_NAME(__short_size) \ KMALLOC_DMA_NAME(__short_size) \ .size = __size, \ @@ -862,7 +868,7 @@ void __init setup_kmalloc_cache_index_table(void) static void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags) { - if (type == KMALLOC_RECLAIM) { + if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) { flags |= SLAB_RECLAIM_ACCOUNT; } else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) { if (mem_cgroup_kmem_disabled()) { -- cgit From 3d97d976e5d58554570003ca5297a67142ae4e29 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 16 Nov 2022 16:48:23 +0100 Subject: mm, slab: ignore SLAB_RECLAIM_ACCOUNT with CONFIG_SLUB_TINY SLAB_RECLAIM_ACCOUNT caches allocate their slab pages with __GFP_RECLAIMABLE and can help against fragmentation by grouping pages by mobility, but on tiny systems mobility grouping is likely disabled anyway and ignoring SLAB_RECLAIM_ACCOUNT might instead lead to merging of caches that are made incompatible just by the flag. Thus with CONFIG_SLUB_TINY, make SLAB_RECLAIM_ACCOUNT ineffective. Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- include/linux/slab.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/slab.h b/include/linux/slab.h index ae2d19ec8467..37fa41af24b6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -129,7 +129,11 @@ /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ +#ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#else +#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* -- cgit From fa9b88e459d710cadf3b01e8a64eda00cc91cdd6 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 21 Nov 2022 16:06:38 +0100 Subject: mm, slub: refactor free debug processing Since commit c7323a5ad078 ("mm/slub: restrict sysfs validation to debug caches and make it safe"), caches with debugging enabled use the free_debug_processing() function to do both freeing checks and actual freeing to partial list under list_lock, bypassing the fast paths. We will want to use the same path for CONFIG_SLUB_TINY, but without the debugging checks, so refactor the code so that free_debug_processing() does only the checks, while the freeing is handled by a new function free_to_partial_list(). For consistency, change return parameter alloc_debug_processing() from int to bool and correct the !SLUB_DEBUG variant to return true and not false. This didn't matter until now, but will in the following changes. Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> --- mm/slub.c | 154 +++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 83 insertions(+), 71 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f4c7f4e3751f..d814936649b1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1368,7 +1368,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s, return 1; } -static noinline int alloc_debug_processing(struct kmem_cache *s, +static noinline bool alloc_debug_processing(struct kmem_cache *s, struct slab *slab, void *object, int orig_size) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { @@ -1380,7 +1380,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, trace(s, slab, object, 1); set_orig_size(s, object, orig_size); init_object(s, object, SLUB_RED_ACTIVE); - return 1; + return true; bad: if (folio_test_slab(slab_folio(slab))) { @@ -1393,7 +1393,7 @@ bad: slab->inuse = slab->objects; slab->freelist = NULL; } - return 0; + return false; } static inline int free_consistency_checks(struct kmem_cache *s, @@ -1646,17 +1646,17 @@ static inline void setup_object_debug(struct kmem_cache *s, void *object) {} static inline void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} -static inline int alloc_debug_processing(struct kmem_cache *s, - struct slab *slab, void *object, int orig_size) { return 0; } +static inline bool alloc_debug_processing(struct kmem_cache *s, + struct slab *slab, void *object, int orig_size) { return true; } -static inline void free_debug_processing( - struct kmem_cache *s, struct slab *slab, - void *head, void *tail, int bulk_cnt, - unsigned long addr) {} +static inline bool free_debug_processing(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, int *bulk_cnt, + unsigned long addr, depot_stack_handle_t handle) { return true; } static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {} static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } +static inline depot_stack_handle_t set_track_prepare(void) { return 0; } static inline void set_track(struct kmem_cache *s, void *object, enum track_item alloc, unsigned long addr) {} static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, @@ -2833,38 +2833,28 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) } /* Supports checking bulk free of a constructed freelist */ -static noinline void free_debug_processing( - struct kmem_cache *s, struct slab *slab, - void *head, void *tail, int bulk_cnt, - unsigned long addr) +static inline bool free_debug_processing(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, int *bulk_cnt, + unsigned long addr, depot_stack_handle_t handle) { - struct kmem_cache_node *n = get_node(s, slab_nid(slab)); - struct slab *slab_free = NULL; + bool checks_ok = false; void *object = head; int cnt = 0; - unsigned long flags; - bool checks_ok = false; - depot_stack_handle_t handle = 0; - - if (s->flags & SLAB_STORE_USER) - handle = set_track_prepare(); - - spin_lock_irqsave(&n->list_lock, flags); if (s->flags & SLAB_CONSISTENCY_CHECKS) { if (!check_slab(s, slab)) goto out; } - if (slab->inuse < bulk_cnt) { + if (slab->inuse < *bulk_cnt) { slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n", - slab->inuse, bulk_cnt); + slab->inuse, *bulk_cnt); goto out; } next_object: - if (++cnt > bulk_cnt) + if (++cnt > *bulk_cnt) goto out_cnt; if (s->flags & SLAB_CONSISTENCY_CHECKS) { @@ -2886,57 +2876,18 @@ next_object: checks_ok = true; out_cnt: - if (cnt != bulk_cnt) + if (cnt != *bulk_cnt) { slab_err(s, slab, "Bulk free expected %d objects but found %d\n", - bulk_cnt, cnt); - -out: - if (checks_ok) { - void *prior = slab->freelist; - - /* Perform the actual freeing while we still hold the locks */ - slab->inuse -= cnt; - set_freepointer(s, tail, prior); - slab->freelist = head; - - /* - * If the slab is empty, and node's partial list is full, - * it should be discarded anyway no matter it's on full or - * partial list. - */ - if (slab->inuse == 0 && n->nr_partial >= s->min_partial) - slab_free = slab; - - if (!prior) { - /* was on full list */ - remove_full(s, n, slab); - if (!slab_free) { - add_partial(n, slab, DEACTIVATE_TO_TAIL); - stat(s, FREE_ADD_PARTIAL); - } - } else if (slab_free) { - remove_partial(n, slab); - stat(s, FREE_REMOVE_PARTIAL); - } + *bulk_cnt, cnt); + *bulk_cnt = cnt; } - if (slab_free) { - /* - * Update the counters while still holding n->list_lock to - * prevent spurious validation warnings - */ - dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); - } - - spin_unlock_irqrestore(&n->list_lock, flags); +out: if (!checks_ok) slab_fix(s, "Object at 0x%p not freed", object); - if (slab_free) { - stat(s, FREE_SLAB); - free_slab(s, slab_free); - } + return checks_ok; } #endif /* CONFIG_SLUB_DEBUG */ @@ -3453,6 +3404,67 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) } EXPORT_SYMBOL(kmem_cache_alloc_node); +static noinline void free_to_partial_list( + struct kmem_cache *s, struct slab *slab, + void *head, void *tail, int bulk_cnt, + unsigned long addr) +{ + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); + struct slab *slab_free = NULL; + int cnt = bulk_cnt; + unsigned long flags; + depot_stack_handle_t handle = 0; + + if (s->flags & SLAB_STORE_USER) + handle = set_track_prepare(); + + spin_lock_irqsave(&n->list_lock, flags); + + if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) { + void *prior = slab->freelist; + + /* Perform the actual freeing while we still hold the locks */ + slab->inuse -= cnt; + set_freepointer(s, tail, prior); + slab->freelist = head; + + /* + * If the slab is empty, and node's partial list is full, + * it should be discarded anyway no matter it's on full or + * partial list. + */ + if (slab->inuse == 0 && n->nr_partial >= s->min_partial) + slab_free = slab; + + if (!prior) { + /* was on full list */ + remove_full(s, n, slab); + if (!slab_free) { + add_partial(n, slab, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } + } else if (slab_free) { + remove_partial(n, slab); + stat(s, FREE_REMOVE_PARTIAL); + } + } + + if (slab_free) { + /* + * Update the counters while still holding n->list_lock to + * prevent spurious validation warnings + */ + dec_slabs_node(s, slab_nid(slab_free), slab_free->objects); + } + + spin_unlock_irqrestore(&n->list_lock, flags); + + if (slab_free) { + stat(s, FREE_SLAB); + free_slab(s, slab_free); + } +} + /* * Slow path handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. @@ -3479,7 +3491,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, return; if (kmem_cache_debug(s)) { - free_debug_processing(s, slab, head, tail, cnt, addr); + free_to_partial_list(s, slab, head, tail, cnt, addr); return; } -- cgit From 56d5a2b9ba85a390473e86b4fe4697560242a248 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 21 Nov 2022 16:23:50 +0100 Subject: mm, slub: split out allocations from pre/post hooks In the following patch we want to introduce CONFIG_SLUB_TINY allocation paths that don't use the percpu slab. To prepare, refactor the allocation functions: Split out __slab_alloc_node() from slab_alloc_node() where the former does the actual allocation and the latter calls the pre/post hooks. Analogically, split out __kmem_cache_alloc_bulk() from kmem_cache_alloc_bulk(). Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> --- mm/slub.c | 130 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 50 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d814936649b1..88f0ce49caab 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2907,10 +2907,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, } #endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */ +#ifdef CONFIG_SLUB_DEBUG static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { -#ifdef CONFIG_SLUB_DEBUG static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); int node; @@ -2941,8 +2941,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n", node, nr_slabs, nr_objs, nr_free); } -#endif } +#else /* CONFIG_SLUB_DEBUG */ +static inline void +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { } +#endif static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) { @@ -3239,45 +3242,13 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return p; } -/* - * If the object has been wiped upon free, make sure it's fully initialized by - * zeroing out freelist pointer. - */ -static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, - void *obj) -{ - if (unlikely(slab_want_init_on_free(s)) && obj) - memset((void *)((char *)kasan_reset_tag(obj) + s->offset), - 0, sizeof(void *)); -} - -/* - * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) - * have the fastpath folded into their functions. So no function call - * overhead for requests that can be satisfied on the fastpath. - * - * The fastpath works by first checking if the lockless freelist can be used. - * If not then __slab_alloc is called for slow processing. - * - * Otherwise we can simply pick the next object from the lockless free list. - */ -static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, +static __always_inline void *__slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) { - void *object; struct kmem_cache_cpu *c; struct slab *slab; unsigned long tid; - struct obj_cgroup *objcg = NULL; - bool init = false; - - s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); - if (!s) - return NULL; - - object = kfence_alloc(s, orig_size, gfpflags); - if (unlikely(object)) - goto out; + void *object; redo: /* @@ -3347,6 +3318,48 @@ redo: stat(s, ALLOC_FASTPATH); } + return object; +} + +/* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)kasan_reset_tag(obj) + s->offset), + 0, sizeof(void *)); +} + +/* + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) + * have the fastpath folded into their functions. So no function call + * overhead for requests that can be satisfied on the fastpath. + * + * The fastpath works by first checking if the lockless freelist can be used. + * If not then __slab_alloc is called for slow processing. + * + * Otherwise we can simply pick the next object from the lockless free list. + */ +static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) +{ + void *object; + struct obj_cgroup *objcg = NULL; + bool init = false; + + s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); + if (!s) + return NULL; + + object = kfence_alloc(s, orig_size, gfpflags); + if (unlikely(object)) + goto out; + + object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); + maybe_wipe_obj_freeptr(s, object); init = slab_want_init_on_alloc(gfpflags, s); @@ -3799,18 +3812,12 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -/* Note that interrupts must be enabled when calling this function. */ -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p, struct obj_cgroup *objcg) { struct kmem_cache_cpu *c; int i; - struct obj_cgroup *objcg = NULL; - /* memcg and kmem_cache debug support */ - s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); - if (unlikely(!s)) - return false; /* * Drain objects in the per cpu slab, while disabling local * IRQs, which protects against PREEMPT and interrupts @@ -3864,18 +3871,41 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_unlock_irq(&s->cpu_slab->lock); slub_put_cpu_ptr(s->cpu_slab); - /* - * memcg and kmem_cache debug support and memory initialization. - * Done outside of the IRQ disabled fastpath loop. - */ - slab_post_alloc_hook(s, objcg, flags, size, p, - slab_want_init_on_alloc(flags, s)); return i; + error: slub_put_cpu_ptr(s->cpu_slab); slab_post_alloc_hook(s, objcg, flags, i, p, false); kmem_cache_free_bulk(s, i, p); return 0; + +} + +/* Note that interrupts must be enabled when calling this function. */ +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) +{ + int i; + struct obj_cgroup *objcg = NULL; + + if (!size) + return 0; + + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags); + if (unlikely(!s)) + return 0; + + i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg); + + /* + * memcg and kmem_cache debug support and memory initialization. + * Done outside of the IRQ disabled fastpath loop. + */ + if (i != 0) + slab_post_alloc_hook(s, objcg, flags, size, p, + slab_want_init_on_alloc(flags, s)); + return i; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); -- cgit From 0af8489b0216fa1dd83e264bef8063f2632633d7 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 15 Nov 2022 18:14:31 +0100 Subject: mm, slub: remove percpu slabs with CONFIG_SLUB_TINY SLUB gets most of its scalability by percpu slabs. However for CONFIG_SLUB_TINY the goal is minimal memory overhead, not scalability. Thus, #ifdef out the whole kmem_cache_cpu percpu structure and associated code. Additionally to the slab page savings, this reduces percpu allocator usage, and code size. This change builds on recent commit c7323a5ad078 ("mm/slub: restrict sysfs validation to debug caches and make it safe"), as caches with enabled debugging also avoid percpu slabs and all allocations and freeing ends up working with the partial list. With a bit more refactoring by the preceding patches, use the same code paths with CONFIG_SLUB_TINY. Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- include/linux/slub_def.h | 4 ++ mm/slub.c | 102 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 95b67863d5cf..aa0ee1678d29 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -41,6 +41,7 @@ enum stat_item { CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ NR_SLUB_STAT_ITEMS }; +#ifndef CONFIG_SLUB_TINY /* * When changing the layout, make sure freelist and tid are still compatible * with this_cpu_cmpxchg_double() alignment requirements. @@ -57,6 +58,7 @@ struct kmem_cache_cpu { unsigned stat[NR_SLUB_STAT_ITEMS]; #endif }; +#endif /* CONFIG_SLUB_TINY */ #ifdef CONFIG_SLUB_CPU_PARTIAL #define slub_percpu_partial(c) ((c)->partial) @@ -88,7 +90,9 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { +#ifndef CONFIG_SLUB_TINY struct kmem_cache_cpu __percpu *cpu_slab; +#endif /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; diff --git a/mm/slub.c b/mm/slub.c index 88f0ce49caab..00d921bd5417 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -337,10 +337,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) */ static nodemask_t slab_nodes; +#ifndef CONFIG_SLUB_TINY /* * Workqueue used for flush_cpu_slab(). */ static struct workqueue_struct *flushwq; +#endif /******************************************************************** * Core slab cache functions @@ -386,10 +388,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) return freelist_dereference(s, object + s->offset); } +#ifndef CONFIG_SLUB_TINY static void prefetch_freepointer(const struct kmem_cache *s, void *object) { prefetchw(object + s->offset); } +#endif /* * When running under KMSAN, get_freepointer_safe() may return an uninitialized @@ -1681,11 +1685,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} +#ifndef CONFIG_SLUB_TINY static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, void **freelist, void *nextfree) { return false; } +#endif #endif /* CONFIG_SLUB_DEBUG */ /* @@ -2219,7 +2225,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, if (!pfmemalloc_match(slab, pc->flags)) continue; - if (kmem_cache_debug(s)) { + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { object = alloc_single_from_partial(s, n, slab, pc->orig_size); if (object) @@ -2334,6 +2340,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context return get_any_partial(s, pc); } +#ifndef CONFIG_SLUB_TINY + #ifdef CONFIG_PREEMPTION /* * Calculate the next globally unique transaction for disambiguation @@ -2347,7 +2355,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context * different cpus. */ #define TID_STEP 1 -#endif +#endif /* CONFIG_PREEMPTION */ static inline unsigned long next_tid(unsigned long tid) { @@ -2808,6 +2816,13 @@ static int slub_cpu_dead(unsigned int cpu) return 0; } +#else /* CONFIG_SLUB_TINY */ +static inline void flush_all_cpus_locked(struct kmem_cache *s) { } +static inline void flush_all(struct kmem_cache *s) { } +static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { } +static inline int slub_cpu_dead(unsigned int cpu) { return 0; } +#endif /* CONFIG_SLUB_TINY */ + /* * Check if the objects in a per cpu structure fit numa * locality expectations. @@ -2955,6 +2970,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) return true; } +#ifndef CONFIG_SLUB_TINY /* * Check the slab->freelist and either transfer the freelist to the * per cpu freelist or deactivate the slab. @@ -3320,6 +3336,33 @@ redo: return object; } +#else /* CONFIG_SLUB_TINY */ +static void *__slab_alloc_node(struct kmem_cache *s, + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) +{ + struct partial_context pc; + struct slab *slab; + void *object; + + pc.flags = gfpflags; + pc.slab = &slab; + pc.orig_size = orig_size; + object = get_partial(s, node, &pc); + + if (object) + return object; + + slab = new_slab(s, gfpflags, node); + if (unlikely(!slab)) { + slab_out_of_memory(s, gfpflags, node); + return NULL; + } + + object = alloc_single_from_new_slab(s, slab, orig_size); + + return object; +} +#endif /* CONFIG_SLUB_TINY */ /* * If the object has been wiped upon free, make sure it's fully initialized by @@ -3503,7 +3546,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, if (kfence_free(head)) return; - if (kmem_cache_debug(s)) { + if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { free_to_partial_list(s, slab, head, tail, cnt, addr); return; } @@ -3604,6 +3647,7 @@ slab_empty: discard_slab(s, slab); } +#ifndef CONFIG_SLUB_TINY /* * Fastpath with forced inlining to produce a kfree and kmem_cache_free that * can perform fastpath freeing without additional function calls. @@ -3678,6 +3722,16 @@ redo: } stat(s, FREE_FASTPATH); } +#else /* CONFIG_SLUB_TINY */ +static void do_slab_free(struct kmem_cache *s, + struct slab *slab, void *head, void *tail, + int cnt, unsigned long addr) +{ + void *tail_obj = tail ? : head; + + __slab_free(s, slab, head, tail_obj, cnt, addr); +} +#endif /* CONFIG_SLUB_TINY */ static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, void **p, int cnt, @@ -3812,6 +3866,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); +#ifndef CONFIG_SLUB_TINY static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p, struct obj_cgroup *objcg) { @@ -3880,6 +3935,36 @@ error: return 0; } +#else /* CONFIG_SLUB_TINY */ +static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, + size_t size, void **p, struct obj_cgroup *objcg) +{ + int i; + + for (i = 0; i < size; i++) { + void *object = kfence_alloc(s, s->object_size, flags); + + if (unlikely(object)) { + p[i] = object; + continue; + } + + p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE, + _RET_IP_, s->object_size); + if (unlikely(!p[i])) + goto error; + + maybe_wipe_obj_freeptr(s, p[i]); + } + + return i; + +error: + slab_post_alloc_hook(s, objcg, flags, i, p, false); + kmem_cache_free_bulk(s, i, p); + return 0; +} +#endif /* CONFIG_SLUB_TINY */ /* Note that interrupts must be enabled when calling this function. */ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, @@ -4062,6 +4147,7 @@ init_kmem_cache_node(struct kmem_cache_node *n) #endif } +#ifndef CONFIG_SLUB_TINY static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) { BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < @@ -4081,6 +4167,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) return 1; } +#else +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) +{ + return 1; +} +#endif /* CONFIG_SLUB_TINY */ static struct kmem_cache *kmem_cache_node; @@ -4143,7 +4235,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) void __kmem_cache_release(struct kmem_cache *s) { cache_random_seq_destroy(s); +#ifndef CONFIG_SLUB_TINY free_percpu(s->cpu_slab); +#endif free_kmem_cache_nodes(s); } @@ -4920,8 +5014,10 @@ void __init kmem_cache_init(void) void __init kmem_cache_init_late(void) { +#ifndef CONFIG_SLUB_TINY flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0); WARN_ON(!flushwq); +#endif } struct kmem_cache * -- cgit From be784ba861b93c5cd2c0565c5819c290675b50be Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 21 Nov 2022 16:58:39 +0100 Subject: mm, slub: don't aggressively inline with CONFIG_SLUB_TINY SLUB fastpaths use __always_inline to avoid function calls. With CONFIG_SLUB_TINY we would rather save the memory. Add a __fastpath_inline macro that's __always_inline normally but empty with CONFIG_SLUB_TINY. bloat-o-meter results on x86_64 mm/slub.o: add/remove: 3/1 grow/shrink: 1/8 up/down: 865/-1784 (-919) Function old new delta kmem_cache_free 20 281 +261 slab_alloc_node.isra - 245 +245 slab_free.constprop.isra - 231 +231 __kmem_cache_alloc_lru.isra - 128 +128 __kmem_cache_release 88 83 -5 __kmem_cache_create 1446 1436 -10 __kmem_cache_free 271 142 -129 kmem_cache_alloc_node 330 127 -203 kmem_cache_free_bulk.part 826 613 -213 __kmem_cache_alloc_node 230 10 -220 kmem_cache_alloc_lru 325 12 -313 kmem_cache_alloc 325 10 -315 kmem_cache_free.part 376 - -376 Total: Before=26103, After=25184, chg -3.52% Signed-off-by: Vlastimil Babka Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> --- mm/slub.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 00d921bd5417..ac9e4a15fa32 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -187,6 +187,12 @@ do { \ #define USE_LOCKLESS_FAST_PATH() (false) #endif +#ifndef CONFIG_SLUB_TINY +#define __fastpath_inline __always_inline +#else +#define __fastpath_inline +#endif + #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); @@ -3386,7 +3392,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, * * Otherwise we can simply pick the next object from the lockless free list. */ -static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, +static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) { void *object; @@ -3412,13 +3418,13 @@ out: return object; } -static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, +static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags, unsigned long addr, size_t orig_size) { return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size); } -static __always_inline +static __fastpath_inline void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) { @@ -3733,7 +3739,7 @@ static void do_slab_free(struct kmem_cache *s, } #endif /* CONFIG_SLUB_TINY */ -static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, +static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, void **p, int cnt, unsigned long addr) { -- cgit From 149b6fa228eda1d191abc440af7162264d716d90 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 11 Nov 2022 11:04:55 +0100 Subject: mm, slob: rename CONFIG_SLOB to CONFIG_SLOB_DEPRECATED As explained in [1], we would like to remove SLOB if possible. - There are no known users that need its somewhat lower memory footprint so much that they cannot handle SLUB (after some modifications by the previous patches) instead. - It is an extra maintenance burden, and a number of features are incompatible with it. - It blocks the API improvement of allowing kfree() on objects allocated via kmem_cache_alloc(). As the first step, rename the CONFIG_SLOB option in the slab allocator configuration choice to CONFIG_SLOB_DEPRECATED. Add CONFIG_SLOB depending on CONFIG_SLOB_DEPRECATED as an internal option to avoid code churn. This will cause existing .config files and defconfigs with CONFIG_SLOB=y to silently switch to the default (and recommended replacement) SLUB, while still allowing SLOB to be configured by anyone that notices and needs it. But those should contact the slab maintainers and linux-mm@kvack.org as explained in the updated help. With no valid objections, the plan is to update the existing defconfigs to SLUB and remove SLOB in a few cycles. To make SLUB more suitable replacement for SLOB, a CONFIG_SLUB_TINY option was introduced to limit SLUB's memory overhead. There is a number of defconfigs specifying CONFIG_SLOB=y. As part of this patch, update them to select CONFIG_SLUB and CONFIG_SLUB_TINY. [1] https://lore.kernel.org/all/b35c3f82-f67b-2103-7d82-7a7ba7521439@suse.cz/ Cc: Russell King Cc: Aaro Koskinen Cc: Janusz Krzysztofik Cc: Tony Lindgren Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: Yoshinori Sato Cc: Rich Felker Cc: Arnd Bergmann Cc: Josh Triplett Cc: Conor Dooley Cc: Damien Le Moal Cc: Christophe Leroy Cc: Geert Uytterhoeven Signed-off-by: Vlastimil Babka Acked-by: Aaro Koskinen # OMAP1 Reviewed-by: Damien Le Moal # riscv k210 Acked-by: Arnd Bergmann # arm Acked-by: Roman Gushchin Acked-by: Mike Rapoport Reviewed-by: Christoph Lameter --- arch/arm/configs/clps711x_defconfig | 3 ++- arch/arm/configs/collie_defconfig | 3 ++- arch/arm/configs/multi_v4t_defconfig | 3 ++- arch/arm/configs/omap1_defconfig | 3 ++- arch/arm/configs/pxa_defconfig | 3 ++- arch/arm/configs/tct_hammer_defconfig | 3 ++- arch/arm/configs/xcep_defconfig | 3 ++- arch/openrisc/configs/or1ksim_defconfig | 3 ++- arch/openrisc/configs/simple_smp_defconfig | 3 ++- arch/riscv/configs/nommu_k210_defconfig | 3 ++- arch/riscv/configs/nommu_k210_sdcard_defconfig | 3 ++- arch/riscv/configs/nommu_virt_defconfig | 3 ++- arch/sh/configs/rsk7201_defconfig | 3 ++- arch/sh/configs/rsk7203_defconfig | 3 ++- arch/sh/configs/se7206_defconfig | 3 ++- arch/sh/configs/shmin_defconfig | 3 ++- arch/sh/configs/shx3_defconfig | 3 ++- kernel/configs/tiny.config | 5 +++-- mm/Kconfig | 17 +++++++++++++++-- 19 files changed, 52 insertions(+), 21 deletions(-) diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig index 92481b2a88fa..adcee238822a 100644 --- a/arch/arm/configs/clps711x_defconfig +++ b/arch/arm/configs/clps711x_defconfig @@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y CONFIG_ARCH_P720T=y CONFIG_AEABI=y # CONFIG_COREDUMP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 2a2d2cb3ce2e..69341c33e0cc 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1" CONFIG_FPE_NWFPE=y CONFIG_PM=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index e2fd822f741a..b60000a89aff 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig @@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y CONFIG_JUMP_LABEL=y CONFIG_PARTITION_ADVANCED=y # CONFIG_COREDUMP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 70511fe4b3ec..246f1bba7df5 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index d60cc9cc4c21..0a0f12df40b5 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_LDM_PARTITION=y CONFIG_CMDLINE_PARTITION=y CONFIG_BINFMT_MISC=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig index 3b29ae1fb750..6bd38b6f22c4 100644 --- a/arch/arm/configs/tct_hammer_defconfig +++ b/arch/arm/configs/tct_hammer_defconfig @@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_SWAP is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index ea59e4b6bfc5..6bd9f71b71fc 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLOCK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_COMPAT_BRK is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_NET=y diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig index 6e1e004047c7..0116e465238f 100644 --- a/arch/openrisc/configs/or1ksim_defconfig +++ b/arch/openrisc/configs/or1ksim_defconfig @@ -10,7 +10,8 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MODULES=y # CONFIG_BLOCK is not set CONFIG_OPENRISC_BUILTIN_DTB="or1ksim" diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig index ff49d868e040..b990cb6c9309 100644 --- a/arch/openrisc/configs/simple_smp_defconfig +++ b/arch/openrisc/configs/simple_smp_defconfig @@ -16,7 +16,8 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_MODULES=y # CONFIG_BLOCK is not set CONFIG_OPENRISC_BUILTIN_DTB="simple_smp" diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 96fe8def644c..79b3ccd58ff0 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y CONFIG_NONPORTABLE=y diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 379740654373..6b80bb13b8ed 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y CONFIG_NONPORTABLE=y diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index 1a56eda5ce46..4cf0f297091e 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -22,7 +22,8 @@ CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_MMU is not set CONFIG_SOC_VIRT=y CONFIG_NONPORTABLE=y diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig index 619c18699459..376e95fa77bc 100644 --- a/arch/sh/configs/rsk7201_defconfig +++ b/arch/sh/configs/rsk7201_defconfig @@ -10,7 +10,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_AIO is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index d00fafc021e1..1d5fd67a3949 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -11,7 +11,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 122216123e63..78e0e7be57ee 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_COMPAT_BRK is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index c0b6f40d01cc..e078b193a78a 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7706=y CONFIG_MEMORY_START=0x0c000000 diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 32ec6eb1eabc..aa353dff7f19 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -20,7 +20,8 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y -CONFIG_SLOB=y +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index 8a44b93da0f3..c2f9c912df1c 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -7,5 +7,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_KERNEL_LZO is not set # CONFIG_KERNEL_LZ4 is not set # CONFIG_SLAB is not set -# CONFIG_SLUB is not set -CONFIG_SLOB=y +# CONFIG_SLOB_DEPRECATED is not set +CONFIG_SLUB=y +CONFIG_SLUB_TINY=y diff --git a/mm/Kconfig b/mm/Kconfig index 6701d72d3037..623d95659ff9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -219,17 +219,30 @@ config SLUB and has enhanced diagnostics. SLUB is the default choice for a slab allocator. -config SLOB +config SLOB_DEPRECATED depends on EXPERT - bool "SLOB (Simple Allocator)" + bool "SLOB (Simple Allocator - DEPRECATED)" depends on !PREEMPT_RT help + Deprecated and scheduled for removal in a few cycles. SLUB + recommended as replacement. CONFIG_SLUB_TINY can be considered + on systems with 16MB or less RAM. + + If you need SLOB to stay, please contact linux-mm@kvack.org and + people listed in the SLAB ALLOCATOR section of MAINTAINERS file, + with your use case. + SLOB replaces the stock allocator with a drastically simpler allocator. SLOB is generally more space efficient but does not perform as well on large systems. endchoice +config SLOB + bool + default y + depends on SLOB_DEPRECATED + config SLUB_TINY bool "Configure SLUB for minimal memory footprint" depends on SLUB && EXPERT -- cgit