From 4d07a037231c985f8c990c9cf1c304bbe31bb764 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:46:59 +0100 Subject: lib/stackdepot: print disabled message only if truly disabled Patch series "stackdepot: allow evicting stack traces", v4. Currently, the stack depot grows indefinitely until it reaches its capacity. Once that happens, the stack depot stops saving new stack traces. This creates a problem for using the stack depot for in-field testing and in production. For such uses, an ideal stack trace storage should: 1. Allow saving fresh stack traces on systems with a large uptime while limiting the amount of memory used to store the traces; 2. Have a low performance impact. Implementing #1 in the stack depot is impossible with the current keep-forever approach. This series targets to address that. Issue #2 is left to be addressed in a future series. This series changes the stack depot implementation to allow evicting unneeded stack traces from the stack depot. The users of the stack depot can do that via new stack_depot_save_flags(STACK_DEPOT_FLAG_GET) and stack_depot_put APIs. Internal changes to the stack depot code include: 1. Storing stack traces in fixed-frame-sized slots (vs precisely-sized slots in the current implementation); the slot size is controlled via CONFIG_STACKDEPOT_MAX_FRAMES (default: 64 frames); 2. Keeping available slots in a freelist (vs keeping an offset to the next free slot); 3. Using a read/write lock for synchronization (vs a lock-free approach combined with a spinlock). This series also integrates the eviction functionality into KASAN: the tag-based modes evict stack traces when the corresponding entry leaves the stack ring, and Generic KASAN evicts stack traces for objects once those leave the quarantine. With KASAN, despite wasting some space on rounding up the size of each stack record, the total memory consumed by stack depot gets saturated due to the eviction of irrelevant stack traces from the stack depot. With the tag-based KASAN modes, the average total amount of memory used for stack traces becomes ~0.5 MB (with the current default stack ring size of 32k entries and the default CONFIG_STACKDEPOT_MAX_FRAMES of 64). With Generic KASAN, the stack traces take up ~1 MB per 1 GB of RAM (as the quarantine's size depends on the amount of RAM). However, with KMSAN, the stack depot ends up using ~4x more memory per a stack trace than before. Thus, for KMSAN, the stack depot capacity is increased accordingly. KMSAN uses a lot of RAM for shadow memory anyway, so the increased stack depot memory usage will not make a significant difference. Other users of the stack depot do not save stack traces as often as KASAN and KMSAN. Thus, the increased memory usage is taken as an acceptable trade-off. In the future, these other users can take advantage of the eviction API to limit the memory waste. There is no measurable boot time performance impact of these changes for KASAN on x86-64. I haven't done any tests for arm64 modes (the stack depot without performance optimizations is not suitable for intended use of those anyway), but I expect a similar result. Obtaining and copying stack trace frames when saving them into stack depot is what takes the most time. This series does not yet provide a way to configure the maximum size of the stack depot externally (e.g. via a command-line parameter). This will be added in a separate series, possibly together with the performance improvement changes. This patch (of 22): Currently, if stack_depot_disable=off is passed to the kernel command-line after stack_depot_disable=on, stack depot prints a message that it is disabled, while it is actually enabled. Fix this by moving printing the disabled message to stack_depot_early_init. Place it before the __stack_depot_early_init_requested check, so that the message is printed even if early stack depot init has not been requested. Also drop the stack_table = NULL assignment from disable_stack_depot, as stack_table is NULL by default. Link: https://lkml.kernel.org/r/cover.1700502145.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/73a25c5fff29f3357cd7a9330e85e09bc8da2cbe.1700502145.git.andreyknvl@google.com Fixes: e1fdc403349c ("lib: stackdepot: add support to disable stack depot") Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 2f5aa851834e..0eeaef4f2523 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -101,14 +101,7 @@ static int next_pool_required = 1; static int __init disable_stack_depot(char *str) { - int ret; - - ret = kstrtobool(str, &stack_depot_disabled); - if (!ret && stack_depot_disabled) { - pr_info("disabled\n"); - stack_table = NULL; - } - return 0; + return kstrtobool(str, &stack_depot_disabled); } early_param("stack_depot_disable", disable_stack_depot); @@ -130,6 +123,15 @@ int __init stack_depot_early_init(void) return 0; __stack_depot_early_init_passed = true; + /* + * Print disabled message even if early init has not been requested: + * stack_depot_init() will not print one. + */ + if (stack_depot_disabled) { + pr_info("disabled\n"); + return 0; + } + /* * If KASAN is enabled, use the maximum order: KASAN is frequently used * in fuzzing scenarios, which leads to a large number of different @@ -138,7 +140,11 @@ int __init stack_depot_early_init(void) if (kasan_enabled() && !stack_bucket_number_order) stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; - if (!__stack_depot_early_init_requested || stack_depot_disabled) + /* + * Check if early init has been requested after setting + * stack_bucket_number_order: stack_depot_init() uses its value. + */ + if (!__stack_depot_early_init_requested) return 0; /* -- cgit From 0c5d44a8142d1ede05943845793d3d8a2f10c338 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:00 +0100 Subject: lib/stackdepot: check disabled flag when fetching Do not try fetching a stack trace from the stack depot if the stack_depot_disabled flag is enabled. Link: https://lkml.kernel.org/r/c3bfa3b7ab00b2e48ab75a3fbb9c67555777cb08.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 0eeaef4f2523..f8a8033e1dc8 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -483,7 +483,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, */ kmsan_unpoison_memory(entries, sizeof(*entries)); - if (!handle) + if (!handle || stack_depot_disabled) return 0; if (parts.pool_index > pool_index_cached) { -- cgit From 603c000c115b40be75063af1a1e75a3b40d3a523 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:01 +0100 Subject: lib/stackdepot: simplify __stack_depot_save The retval local variable in __stack_depot_save has the union type handle_parts, but the function never uses anything but the union's handle field. Define retval simply as depot_stack_handle_t to simplify the code. Link: https://lkml.kernel.org/r/3b0763c8057a1cf2f200ff250a5f9580ee36a28c.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f8a8033e1dc8..3e71c8f61c7d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -366,7 +366,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; - union handle_parts retval = { .handle = 0 }; + depot_stack_handle_t handle = 0; struct page *page = NULL; void *prealloc = NULL; unsigned long flags; @@ -383,7 +383,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, nr_entries = filter_irq_stacks(entries, nr_entries); if (unlikely(nr_entries == 0) || stack_depot_disabled) - goto fast_exit; + return 0; hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; @@ -449,9 +449,8 @@ exit: free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); } if (found) - retval.handle = found->handle.handle; -fast_exit: - return retval.handle; + handle = found->handle.handle; + return handle; } EXPORT_SYMBOL_GPL(__stack_depot_save); -- cgit From 5f9ce55e020742e3c86a06941fbe9f37f9c022dd Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:02 +0100 Subject: lib/stackdepot: drop valid bit from handles Stack depot doesn't use the valid bit in handles in any way, so drop it. Link: https://lkml.kernel.org/r/34969bba2ca6e012c6ad071767197dee64dc5723.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 3e71c8f61c7d..46a422d31c1f 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -32,13 +32,12 @@ #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) -#define DEPOT_VALID_BITS 1 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) #define DEPOT_STACK_ALIGN 4 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) -#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_VALID_BITS - \ - DEPOT_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ + STACK_DEPOT_EXTRA_BITS) #define DEPOT_POOLS_CAP 8192 #define DEPOT_MAX_POOLS \ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ @@ -50,7 +49,6 @@ union handle_parts { struct { u32 pool_index : DEPOT_POOL_INDEX_BITS; u32 offset : DEPOT_OFFSET_BITS; - u32 valid : DEPOT_VALID_BITS; u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; @@ -309,7 +307,6 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->size = size; stack->handle.pool_index = pool_index; stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; - stack->handle.valid = 1; stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); pool_offset += required_size; -- cgit From 83130ab2d8a49e86c70d628d1446a84c8e6ad1a4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:03 +0100 Subject: lib/stackdepot: add depot_fetch_stack helper Add a helper depot_fetch_stack function that fetches the pointer to a stack record. With this change, all static depot_* functions now operate on stack pools and the exported stack_depot_* functions operate on the hash table. Link: https://lkml.kernel.org/r/170d8c202f29dc8e3d5491ee074d1e9e029a46db.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 46a422d31c1f..e41713983cac 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -310,6 +310,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); pool_offset += required_size; + /* * Let KMSAN know the stored stack record is initialized. This shall * prevent false positive reports if instrumented code accesses it. @@ -319,6 +320,32 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return stack; } +static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) +{ + union handle_parts parts = { .handle = handle }; + /* + * READ_ONCE pairs with potential concurrent write in + * depot_alloc_stack(). + */ + int pool_index_cached = READ_ONCE(pool_index); + void *pool; + size_t offset = parts.offset << DEPOT_STACK_ALIGN; + struct stack_record *stack; + + if (parts.pool_index > pool_index_cached) { + WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", + parts.pool_index, pool_index_cached, handle); + return NULL; + } + + pool = stack_pools[parts.pool_index]; + if (!pool) + return NULL; + + stack = pool + offset; + return stack; +} + /* Calculates the hash for a stack. */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { @@ -462,14 +489,6 @@ EXPORT_SYMBOL_GPL(stack_depot_save); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { - union handle_parts parts = { .handle = handle }; - /* - * READ_ONCE pairs with potential concurrent write in - * depot_alloc_stack. - */ - int pool_index_cached = READ_ONCE(pool_index); - void *pool; - size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; *entries = NULL; @@ -482,15 +501,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; - if (parts.pool_index > pool_index_cached) { - WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pool_index_cached, handle); - return 0; - } - pool = stack_pools[parts.pool_index]; - if (!pool) - return 0; - stack = pool + offset; + stack = depot_fetch_stack(handle); *entries = stack->entries; return stack->size; -- cgit From fc60e0caa94dd7ca0e97a1d42527f71c9d51cd2d Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:04 +0100 Subject: lib/stackdepot: use fixed-sized slots for stack records Instead of storing stack records in stack depot pools one right after another, use fixed-sized slots. Add a new Kconfig option STACKDEPOT_MAX_FRAMES that allows to select the size of the slot in frames. Use 64 as the default value, which is the maximum stack trace size both KASAN and KMSAN use right now. Also add descriptions for other stack depot Kconfig options. This is preparatory patch for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/dce7d030a99ff61022509665187fac45b0827298.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index e41713983cac..682497dbe081 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -58,9 +58,12 @@ struct stack_record { u32 hash; /* Hash in the hash table */ u32 size; /* Number of stored frames */ union handle_parts handle; - unsigned long entries[]; /* Variable-sized array of frames */ + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ }; +#define DEPOT_STACK_RECORD_SIZE \ + ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) + static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; @@ -264,9 +267,7 @@ static struct stack_record * depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - size_t required_size = struct_size(stack, entries, size); - - required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); + size_t required_size = DEPOT_STACK_RECORD_SIZE; /* Check if there is not enough space in the current pool. */ if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { @@ -301,6 +302,10 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) if (stack_pools[pool_index] == NULL) return NULL; + /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ + if (size > CONFIG_STACKDEPOT_MAX_FRAMES) + size = CONFIG_STACKDEPOT_MAX_FRAMES; + /* Save the stack trace. */ stack = stack_pools[pool_index] + pool_offset; stack->hash = hash; -- cgit From fcccc41ecb0c96e59c471c389cd708014be2efc8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:05 +0100 Subject: lib/stackdepot: fix and clean-up atomic annotations Drop smp_load_acquire from next_pool_required in depot_init_pool, as both depot_init_pool and the all smp_store_release's to this variable are executed under the stack depot lock. Also simplify and clean up comments accompanying the use of atomic accesses in the stack depot code. Link: https://lkml.kernel.org/r/c118ef044d8db80248d9e1f14592c72e8429e9d9.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 682497dbe081..cfa3c6c7cc2e 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -231,10 +231,10 @@ static void depot_init_pool(void **prealloc) /* * If the next pool is already initialized or the maximum number of * pools is reached, do not use the preallocated memory. - * smp_load_acquire() here pairs with smp_store_release() below and - * in depot_alloc_stack(). + * Access next_pool_required non-atomically, as there are no concurrent + * write accesses to this variable. */ - if (!smp_load_acquire(&next_pool_required)) + if (!next_pool_required) return; /* Check if the current pool is not yet allocated. */ @@ -255,8 +255,8 @@ static void depot_init_pool(void **prealloc) * At this point, either the next pool is initialized or the * maximum number of pools is reached. In either case, take * note that initializing another pool is not required. - * This smp_store_release pairs with smp_load_acquire() above - * and in stack_depot_save(). + * smp_store_release() pairs with smp_load_acquire() in + * stack_depot_save(). */ smp_store_release(&next_pool_required, 0); } @@ -279,7 +279,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* * Move on to the next pool. - * WRITE_ONCE pairs with potential concurrent read in + * WRITE_ONCE() pairs with potential concurrent read in * stack_depot_fetch(). */ WRITE_ONCE(pool_index, pool_index + 1); @@ -287,8 +287,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* * If the maximum number of pools is not reached, take note * that the next pool needs to initialized. - * smp_store_release() here pairs with smp_load_acquire() in - * stack_depot_save() and depot_init_pool(). + * smp_store_release() pairs with smp_load_acquire() in + * stack_depot_save(). */ if (pool_index + 1 < DEPOT_MAX_POOLS) smp_store_release(&next_pool_required, 1); @@ -329,7 +329,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle }; /* - * READ_ONCE pairs with potential concurrent write in + * READ_ONCE() pairs with potential concurrent write in * depot_alloc_stack(). */ int pool_index_cached = READ_ONCE(pool_index); @@ -419,8 +419,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, /* * Fast path: look the stack trace up without locking. - * The smp_load_acquire() here pairs with smp_store_release() to - * |bucket| below. + * smp_load_acquire() pairs with smp_store_release() to |bucket| below. */ found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); if (found) @@ -430,8 +429,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * Check if another stack pool needs to be initialized. If so, allocate * the memory now - we won't be able to do that under the lock. * - * The smp_load_acquire() here pairs with smp_store_release() to - * |next_pool_inited| in depot_alloc_stack() and depot_init_pool(). + * smp_load_acquire() pairs with smp_store_release() in + * depot_alloc_stack() and depot_init_pool(). */ if (unlikely(can_alloc && smp_load_acquire(&next_pool_required))) { /* @@ -457,8 +456,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, if (new) { new->next = *bucket; /* - * This smp_store_release() pairs with - * smp_load_acquire() from |bucket| above. + * smp_store_release() pairs with smp_load_acquire() + * from |bucket| above. */ smp_store_release(bucket, new); found = new; -- cgit From 94b7d32870298be93b67bceb0470936c54fb2007 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:06 +0100 Subject: lib/stackdepot: rework helpers for depot_alloc_stack Split code in depot_alloc_stack and depot_init_pool into 3 functions: 1. depot_keep_next_pool that keeps preallocated memory for the next pool if required. 2. depot_update_pools that moves on to the next pool if there's no space left in the current pool, uses preallocated memory for the new current pool if required, and calls depot_keep_next_pool otherwise. 3. depot_alloc_stack that calls depot_update_pools and then allocates a stack record as before. This makes it somewhat easier to follow the logic of depot_alloc_stack and also serves as a preparation for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/71fb144d42b701fcb46708d7f4be6801a4a8270e.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 86 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 37 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index cfa3c6c7cc2e..b3af868627f4 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -225,11 +225,11 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Uses preallocated memory to initialize a new stack depot pool. */ -static void depot_init_pool(void **prealloc) +/* Keeps the preallocated memory to be used for the next stack depot pool. */ +static void depot_keep_next_pool(void **prealloc) { /* - * If the next pool is already initialized or the maximum number of + * If the next pool is already saved or the maximum number of * pools is reached, do not use the preallocated memory. * Access next_pool_required non-atomically, as there are no concurrent * write accesses to this variable. @@ -237,44 +237,34 @@ static void depot_init_pool(void **prealloc) if (!next_pool_required) return; - /* Check if the current pool is not yet allocated. */ - if (stack_pools[pool_index] == NULL) { - /* Use the preallocated memory for the current pool. */ - stack_pools[pool_index] = *prealloc; + /* + * Use the preallocated memory for the next pool + * as long as we do not exceed the maximum number of pools. + */ + if (pool_index + 1 < DEPOT_MAX_POOLS) { + stack_pools[pool_index + 1] = *prealloc; *prealloc = NULL; - } else { - /* - * Otherwise, use the preallocated memory for the next pool - * as long as we do not exceed the maximum number of pools. - */ - if (pool_index + 1 < DEPOT_MAX_POOLS) { - stack_pools[pool_index + 1] = *prealloc; - *prealloc = NULL; - } - /* - * At this point, either the next pool is initialized or the - * maximum number of pools is reached. In either case, take - * note that initializing another pool is not required. - * smp_store_release() pairs with smp_load_acquire() in - * stack_depot_save(). - */ - smp_store_release(&next_pool_required, 0); } + + /* + * At this point, either the next pool is kept or the maximum + * number of pools is reached. In either case, take note that + * keeping another pool is not required. + * smp_store_release() pairs with smp_load_acquire() in + * stack_depot_save(). + */ + smp_store_release(&next_pool_required, 0); } -/* Allocates a new stack in a stack depot pool. */ -static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +/* Updates references to the current and the next stack depot pools. */ +static bool depot_update_pools(size_t required_size, void **prealloc) { - struct stack_record *stack; - size_t required_size = DEPOT_STACK_RECORD_SIZE; - /* Check if there is not enough space in the current pool. */ if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { /* Bail out if we reached the pool limit. */ if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); - return NULL; + return false; } /* @@ -284,9 +274,10 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) */ WRITE_ONCE(pool_index, pool_index + 1); pool_offset = 0; + /* * If the maximum number of pools is not reached, take note - * that the next pool needs to initialized. + * that the next pool needs to be initialized. * smp_store_release() pairs with smp_load_acquire() in * stack_depot_save(). */ @@ -294,9 +285,30 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) smp_store_release(&next_pool_required, 1); } - /* Assign the preallocated memory to a pool if required. */ + /* Check if the current pool is not yet allocated. */ + if (*prealloc && stack_pools[pool_index] == NULL) { + /* Use the preallocated memory for the current pool. */ + stack_pools[pool_index] = *prealloc; + *prealloc = NULL; + return true; + } + + /* Otherwise, try using the preallocated memory for the next pool. */ if (*prealloc) - depot_init_pool(prealloc); + depot_keep_next_pool(prealloc); + return true; +} + +/* Allocates a new stack in a stack depot pool. */ +static struct stack_record * +depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +{ + struct stack_record *stack; + size_t required_size = DEPOT_STACK_RECORD_SIZE; + + /* Update current and next pools if required and possible. */ + if (!depot_update_pools(required_size, prealloc)) + return NULL; /* Check if we have a pool to save the stack trace. */ if (stack_pools[pool_index] == NULL) @@ -330,7 +342,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) union handle_parts parts = { .handle = handle }; /* * READ_ONCE() pairs with potential concurrent write in - * depot_alloc_stack(). + * depot_update_pools(). */ int pool_index_cached = READ_ONCE(pool_index); void *pool; @@ -430,7 +442,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * the memory now - we won't be able to do that under the lock. * * smp_load_acquire() pairs with smp_store_release() in - * depot_alloc_stack() and depot_init_pool(). + * depot_update_pools() and depot_keep_next_pool(). */ if (unlikely(can_alloc && smp_load_acquire(&next_pool_required))) { /* @@ -467,7 +479,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * Stack depot already contains this stack trace, but let's * keep the preallocated memory for the future. */ - depot_init_pool(&prealloc); + depot_keep_next_pool(&prealloc); } raw_spin_unlock_irqrestore(&pool_lock, flags); -- cgit From b6a353d3ebc2b5eea3cab81ed81764bb1dd6f4ab Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:07 +0100 Subject: lib/stackdepot: rename next_pool_required to new_pool_required Rename next_pool_required to new_pool_required. This a purely code readability change: the following patch will change stack depot to store the pointer to the new pool in a separate variable, and "new" seems like a more logical name. Link: https://lkml.kernel.org/r/fd7cd6c6eb250c13ec5d2009d75bb4ddd1470db9.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b3af868627f4..a38661beab97 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -93,12 +93,11 @@ static size_t pool_offset; static DEFINE_RAW_SPINLOCK(pool_lock); /* * Stack depot tries to keep an extra pool allocated even before it runs out - * of space in the currently used pool. - * This flag marks that this next extra pool needs to be allocated and - * initialized. It has the value 0 when either the next pool is not yet - * initialized or the limit on the number of pools is reached. + * of space in the currently used pool. This flag marks whether this extra pool + * needs to be allocated. It has the value 0 when either an extra pool is not + * yet allocated or if the limit on the number of pools is reached. */ -static int next_pool_required = 1; +static int new_pool_required = 1; static int __init disable_stack_depot(char *str) { @@ -225,20 +224,20 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Keeps the preallocated memory to be used for the next stack depot pool. */ -static void depot_keep_next_pool(void **prealloc) +/* Keeps the preallocated memory to be used for a new stack depot pool. */ +static void depot_keep_new_pool(void **prealloc) { /* - * If the next pool is already saved or the maximum number of + * If a new pool is already saved or the maximum number of * pools is reached, do not use the preallocated memory. - * Access next_pool_required non-atomically, as there are no concurrent + * Access new_pool_required non-atomically, as there are no concurrent * write accesses to this variable. */ - if (!next_pool_required) + if (!new_pool_required) return; /* - * Use the preallocated memory for the next pool + * Use the preallocated memory for the new pool * as long as we do not exceed the maximum number of pools. */ if (pool_index + 1 < DEPOT_MAX_POOLS) { @@ -247,13 +246,13 @@ static void depot_keep_next_pool(void **prealloc) } /* - * At this point, either the next pool is kept or the maximum + * At this point, either a new pool is kept or the maximum * number of pools is reached. In either case, take note that * keeping another pool is not required. * smp_store_release() pairs with smp_load_acquire() in * stack_depot_save(). */ - smp_store_release(&next_pool_required, 0); + smp_store_release(&new_pool_required, 0); } /* Updates references to the current and the next stack depot pools. */ @@ -268,7 +267,7 @@ static bool depot_update_pools(size_t required_size, void **prealloc) } /* - * Move on to the next pool. + * Move on to the new pool. * WRITE_ONCE() pairs with potential concurrent read in * stack_depot_fetch(). */ @@ -277,12 +276,12 @@ static bool depot_update_pools(size_t required_size, void **prealloc) /* * If the maximum number of pools is not reached, take note - * that the next pool needs to be initialized. + * that yet another new pool needs to be allocated. * smp_store_release() pairs with smp_load_acquire() in * stack_depot_save(). */ if (pool_index + 1 < DEPOT_MAX_POOLS) - smp_store_release(&next_pool_required, 1); + smp_store_release(&new_pool_required, 1); } /* Check if the current pool is not yet allocated. */ @@ -293,9 +292,9 @@ static bool depot_update_pools(size_t required_size, void **prealloc) return true; } - /* Otherwise, try using the preallocated memory for the next pool. */ + /* Otherwise, try using the preallocated memory for a new pool. */ if (*prealloc) - depot_keep_next_pool(prealloc); + depot_keep_new_pool(prealloc); return true; } @@ -306,7 +305,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) struct stack_record *stack; size_t required_size = DEPOT_STACK_RECORD_SIZE; - /* Update current and next pools if required and possible. */ + /* Update current and new pools if required and possible. */ if (!depot_update_pools(required_size, prealloc)) return NULL; @@ -438,13 +437,13 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, goto exit; /* - * Check if another stack pool needs to be initialized. If so, allocate - * the memory now - we won't be able to do that under the lock. + * Check if another stack pool needs to be allocated. If so, allocate + * the memory now: we won't be able to do that under the lock. * * smp_load_acquire() pairs with smp_store_release() in - * depot_update_pools() and depot_keep_next_pool(). + * depot_update_pools() and depot_keep_new_pool(). */ - if (unlikely(can_alloc && smp_load_acquire(&next_pool_required))) { + if (unlikely(can_alloc && smp_load_acquire(&new_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -477,9 +476,9 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, } else if (prealloc) { /* * Stack depot already contains this stack trace, but let's - * keep the preallocated memory for the future. + * keep the preallocated memory for future. */ - depot_keep_next_pool(&prealloc); + depot_keep_new_pool(&prealloc); } raw_spin_unlock_irqrestore(&pool_lock, flags); -- cgit From a5d21f71715a0459e5313881203f86eefbeefb3b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:08 +0100 Subject: lib/stackdepot: store next pool pointer in new_pool Instead of using the last pointer in stack_pools for storing the pointer to a new pool (which does not yet store any stack records), use a new new_pool variable. This a purely code readability change: it seems more logical to store the pointer to a pool with a special meaning in a dedicated variable. Link: https://lkml.kernel.org/r/448bc18296c16bef95cb3167697be6583dcc8ce3.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a38661beab97..68c1ac9aa916 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -85,6 +85,8 @@ static unsigned int stack_hash_mask; /* Array of memory regions that store stack traces. */ static void *stack_pools[DEPOT_MAX_POOLS]; +/* Newly allocated pool that is not yet added to stack_pools. */ +static void *new_pool; /* Currently used pool in stack_pools. */ static int pool_index; /* Offset to the unused space in the currently used pool. */ @@ -241,7 +243,7 @@ static void depot_keep_new_pool(void **prealloc) * as long as we do not exceed the maximum number of pools. */ if (pool_index + 1 < DEPOT_MAX_POOLS) { - stack_pools[pool_index + 1] = *prealloc; + new_pool = *prealloc; *prealloc = NULL; } @@ -272,6 +274,8 @@ static bool depot_update_pools(size_t required_size, void **prealloc) * stack_depot_fetch(). */ WRITE_ONCE(pool_index, pool_index + 1); + stack_pools[pool_index] = new_pool; + new_pool = NULL; pool_offset = 0; /* -- cgit From b29d31885814003245e2e36373bef4ea6721f114 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:09 +0100 Subject: lib/stackdepot: store free stack records in a freelist Instead of using the global pool_offset variable to find a free slot when storing a new stack record, mainlain a freelist of free slots within the allocated stack pools. A global next_stack variable is used as the head of the freelist, and the next field in the stack_record struct is reused as freelist link (when the record is not in the freelist, this field is used as a link in the hash table). This is preparatory patch for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/b9e4c79955c2121b69301778643b203d3fb09ccc.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 131 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 49 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 68c1ac9aa916..a5eff165c0d5 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -54,8 +54,8 @@ union handle_parts { }; struct stack_record { - struct stack_record *next; /* Link in the hash table */ - u32 hash; /* Hash in the hash table */ + struct stack_record *next; /* Link in hash table or freelist */ + u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ union handle_parts handle; unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ @@ -87,10 +87,10 @@ static unsigned int stack_hash_mask; static void *stack_pools[DEPOT_MAX_POOLS]; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; -/* Currently used pool in stack_pools. */ -static int pool_index; -/* Offset to the unused space in the currently used pool. */ -static size_t pool_offset; +/* Number of pools in stack_pools. */ +static int pools_num; +/* Next stack in the freelist of stack records within stack_pools. */ +static struct stack_record *next_stack; /* Lock that protects the variables above. */ static DEFINE_RAW_SPINLOCK(pool_lock); /* @@ -226,6 +226,42 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); +/* Initializes a stack depol pool. */ +static void depot_init_pool(void *pool) +{ + const int records_in_pool = DEPOT_POOL_SIZE / DEPOT_STACK_RECORD_SIZE; + int i, offset; + + /* Initialize handles and link stack records to each other. */ + for (i = 0, offset = 0; + offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; + i++, offset += DEPOT_STACK_RECORD_SIZE) { + struct stack_record *stack = pool + offset; + + stack->handle.pool_index = pools_num; + stack->handle.offset = offset >> DEPOT_STACK_ALIGN; + stack->handle.extra = 0; + + if (i < records_in_pool - 1) + stack->next = (void *)stack + DEPOT_STACK_RECORD_SIZE; + else + stack->next = NULL; + } + + /* Link stack records into the freelist. */ + WARN_ON(next_stack); + next_stack = pool; + + /* Save reference to the pool to be used by depot_fetch_stack(). */ + stack_pools[pools_num] = pool; + + /* + * WRITE_ONCE() pairs with potential concurrent read in + * depot_fetch_stack(). + */ + WRITE_ONCE(pools_num, pools_num + 1); +} + /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { @@ -242,7 +278,7 @@ static void depot_keep_new_pool(void **prealloc) * Use the preallocated memory for the new pool * as long as we do not exceed the maximum number of pools. */ - if (pool_index + 1 < DEPOT_MAX_POOLS) { + if (pools_num < DEPOT_MAX_POOLS) { new_pool = *prealloc; *prealloc = NULL; } @@ -258,45 +294,42 @@ static void depot_keep_new_pool(void **prealloc) } /* Updates references to the current and the next stack depot pools. */ -static bool depot_update_pools(size_t required_size, void **prealloc) +static bool depot_update_pools(void **prealloc) { - /* Check if there is not enough space in the current pool. */ - if (unlikely(pool_offset + required_size > DEPOT_POOL_SIZE)) { - /* Bail out if we reached the pool limit. */ - if (unlikely(pool_index + 1 >= DEPOT_MAX_POOLS)) { - WARN_ONCE(1, "Stack depot reached limit capacity"); - return false; - } + /* Check if we still have objects in the freelist. */ + if (next_stack) + goto out_keep_prealloc; - /* - * Move on to the new pool. - * WRITE_ONCE() pairs with potential concurrent read in - * stack_depot_fetch(). - */ - WRITE_ONCE(pool_index, pool_index + 1); - stack_pools[pool_index] = new_pool; + /* Check if we have a new pool saved and use it. */ + if (new_pool) { + depot_init_pool(new_pool); new_pool = NULL; - pool_offset = 0; - /* - * If the maximum number of pools is not reached, take note - * that yet another new pool needs to be allocated. - * smp_store_release() pairs with smp_load_acquire() in - * stack_depot_save(). - */ - if (pool_index + 1 < DEPOT_MAX_POOLS) + /* Take note that we might need a new new_pool. */ + if (pools_num < DEPOT_MAX_POOLS) smp_store_release(&new_pool_required, 1); + + /* Try keeping the preallocated memory for new_pool. */ + goto out_keep_prealloc; + } + + /* Bail out if we reached the pool limit. */ + if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + WARN_ONCE(1, "Stack depot reached limit capacity"); + return false; } - /* Check if the current pool is not yet allocated. */ - if (*prealloc && stack_pools[pool_index] == NULL) { - /* Use the preallocated memory for the current pool. */ - stack_pools[pool_index] = *prealloc; + /* Check if we have preallocated memory and use it. */ + if (*prealloc) { + depot_init_pool(*prealloc); *prealloc = NULL; return true; } - /* Otherwise, try using the preallocated memory for a new pool. */ + return false; + +out_keep_prealloc: + /* Keep the preallocated memory for a new pool if required. */ if (*prealloc) depot_keep_new_pool(prealloc); return true; @@ -307,35 +340,35 @@ static struct stack_record * depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - size_t required_size = DEPOT_STACK_RECORD_SIZE; /* Update current and new pools if required and possible. */ - if (!depot_update_pools(required_size, prealloc)) + if (!depot_update_pools(prealloc)) return NULL; - /* Check if we have a pool to save the stack trace. */ - if (stack_pools[pool_index] == NULL) + /* Check if we have a stack record to save the stack trace. */ + stack = next_stack; + if (!stack) return NULL; + /* Advance the freelist. */ + next_stack = stack->next; + /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) size = CONFIG_STACKDEPOT_MAX_FRAMES; /* Save the stack trace. */ - stack = stack_pools[pool_index] + pool_offset; + stack->next = NULL; stack->hash = hash; stack->size = size; - stack->handle.pool_index = pool_index; - stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; - stack->handle.extra = 0; + /* stack->handle is already filled in by depot_init_pool(). */ memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); - pool_offset += required_size; /* * Let KMSAN know the stored stack record is initialized. This shall * prevent false positive reports if instrumented code accesses it. */ - kmsan_unpoison_memory(stack, required_size); + kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); return stack; } @@ -345,16 +378,16 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) union handle_parts parts = { .handle = handle }; /* * READ_ONCE() pairs with potential concurrent write in - * depot_update_pools(). + * depot_init_pool(). */ - int pool_index_cached = READ_ONCE(pool_index); + int pools_num_cached = READ_ONCE(pools_num); void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - if (parts.pool_index > pool_index_cached) { + if (parts.pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pool_index_cached, handle); + parts.pool_index, pools_num_cached, handle); return NULL; } -- cgit From a6cd957021f2bbbe0f02e5c32389eb4c06aa97c8 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:10 +0100 Subject: lib/stackdepot: use read/write lock Currently, stack depot uses the following locking scheme: 1. Lock-free accesses when looking up a stack record, which allows to have multiple users to look up records in parallel; 2. Spinlock for protecting the stack depot pools and the hash table when adding a new record. For implementing the eviction of stack traces from stack depot, the lock-free approach is not going to work anymore, as we will need to be able to also remove records from the hash table. Convert the spinlock into a read/write lock, and drop the atomic accesses, as they are no longer required. Looking up stack traces is now protected by the read lock and adding new records - by the write lock. One of the following patches will add a new function for evicting stack records, which will be protected by the write lock as well. With this change, multiple users can still look up records in parallel. This is preparatory patch for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/9f81ffcc4bb422ebb6326a65a770bf1918634cbb.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 87 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 41 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a5eff165c0d5..8378b32b5310 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -91,15 +92,15 @@ static void *new_pool; static int pools_num; /* Next stack in the freelist of stack records within stack_pools. */ static struct stack_record *next_stack; -/* Lock that protects the variables above. */ -static DEFINE_RAW_SPINLOCK(pool_lock); /* * Stack depot tries to keep an extra pool allocated even before it runs out * of space in the currently used pool. This flag marks whether this extra pool * needs to be allocated. It has the value 0 when either an extra pool is not * yet allocated or if the limit on the number of pools is reached. */ -static int new_pool_required = 1; +static bool new_pool_required = true; +/* Lock that protects the variables above. */ +static DEFINE_RWLOCK(pool_rwlock); static int __init disable_stack_depot(char *str) { @@ -232,6 +233,8 @@ static void depot_init_pool(void *pool) const int records_in_pool = DEPOT_POOL_SIZE / DEPOT_STACK_RECORD_SIZE; int i, offset; + lockdep_assert_held_write(&pool_rwlock); + /* Initialize handles and link stack records to each other. */ for (i = 0, offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; @@ -254,22 +257,17 @@ static void depot_init_pool(void *pool) /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; - - /* - * WRITE_ONCE() pairs with potential concurrent read in - * depot_fetch_stack(). - */ - WRITE_ONCE(pools_num, pools_num + 1); + pools_num++; } /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { + lockdep_assert_held_write(&pool_rwlock); + /* * If a new pool is already saved or the maximum number of * pools is reached, do not use the preallocated memory. - * Access new_pool_required non-atomically, as there are no concurrent - * write accesses to this variable. */ if (!new_pool_required) return; @@ -287,15 +285,15 @@ static void depot_keep_new_pool(void **prealloc) * At this point, either a new pool is kept or the maximum * number of pools is reached. In either case, take note that * keeping another pool is not required. - * smp_store_release() pairs with smp_load_acquire() in - * stack_depot_save(). */ - smp_store_release(&new_pool_required, 0); + new_pool_required = false; } /* Updates references to the current and the next stack depot pools. */ static bool depot_update_pools(void **prealloc) { + lockdep_assert_held_write(&pool_rwlock); + /* Check if we still have objects in the freelist. */ if (next_stack) goto out_keep_prealloc; @@ -307,7 +305,7 @@ static bool depot_update_pools(void **prealloc) /* Take note that we might need a new new_pool. */ if (pools_num < DEPOT_MAX_POOLS) - smp_store_release(&new_pool_required, 1); + new_pool_required = true; /* Try keeping the preallocated memory for new_pool. */ goto out_keep_prealloc; @@ -341,6 +339,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; + lockdep_assert_held_write(&pool_rwlock); + /* Update current and new pools if required and possible. */ if (!depot_update_pools(prealloc)) return NULL; @@ -376,18 +376,15 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle }; - /* - * READ_ONCE() pairs with potential concurrent write in - * depot_init_pool(). - */ - int pools_num_cached = READ_ONCE(pools_num); void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - if (parts.pool_index > pools_num_cached) { + lockdep_assert_held_read(&pool_rwlock); + + if (parts.pool_index > pools_num) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num_cached, handle); + parts.pool_index, pools_num, handle); return NULL; } @@ -429,6 +426,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, { struct stack_record *found; + lockdep_assert_held(&pool_rwlock); + for (found = bucket; found; found = found->next) { if (found->hash == hash && found->size == size && @@ -446,6 +445,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, depot_stack_handle_t handle = 0; struct page *page = NULL; void *prealloc = NULL; + bool need_alloc = false; unsigned long flags; u32 hash; @@ -465,22 +465,26 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; - /* - * Fast path: look the stack trace up without locking. - * smp_load_acquire() pairs with smp_store_release() to |bucket| below. - */ - found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); - if (found) + read_lock_irqsave(&pool_rwlock, flags); + + /* Fast path: look the stack trace up without full locking. */ + found = find_stack(*bucket, entries, nr_entries, hash); + if (found) { + read_unlock_irqrestore(&pool_rwlock, flags); goto exit; + } + + /* Take note if another stack pool needs to be allocated. */ + if (new_pool_required) + need_alloc = true; + + read_unlock_irqrestore(&pool_rwlock, flags); /* - * Check if another stack pool needs to be allocated. If so, allocate - * the memory now: we won't be able to do that under the lock. - * - * smp_load_acquire() pairs with smp_store_release() in - * depot_update_pools() and depot_keep_new_pool(). + * Allocate memory for a new pool if required now: + * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && smp_load_acquire(&new_pool_required))) { + if (unlikely(can_alloc && need_alloc)) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -494,7 +498,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, prealloc = page_address(page); } - raw_spin_lock_irqsave(&pool_lock, flags); + write_lock_irqsave(&pool_rwlock, flags); found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { @@ -503,11 +507,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, if (new) { new->next = *bucket; - /* - * smp_store_release() pairs with smp_load_acquire() - * from |bucket| above. - */ - smp_store_release(bucket, new); + *bucket = new; found = new; } } else if (prealloc) { @@ -518,7 +518,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, depot_keep_new_pool(&prealloc); } - raw_spin_unlock_irqrestore(&pool_lock, flags); + write_unlock_irqrestore(&pool_rwlock, flags); exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ @@ -542,6 +542,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { struct stack_record *stack; + unsigned long flags; *entries = NULL; /* @@ -553,8 +554,12 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; + read_lock_irqsave(&pool_rwlock, flags); + stack = depot_fetch_stack(handle); + read_unlock_irqrestore(&pool_rwlock, flags); + *entries = stack->entries; return stack->size; } -- cgit From 4805180bc165238c3d845a992a5962ee87097c15 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:11 +0100 Subject: lib/stackdepot: use list_head for stack record links Switch stack_record to use list_head for links in the hash table and in the freelist. This will allow removing entries from the hash table buckets. This is preparatory patch for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/4787d9a584cd33433d9ee1846b17fa3d3e1987ad.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 87 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 37 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8378b32b5310..4bb0af423f82 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ union handle_parts { }; struct stack_record { - struct stack_record *next; /* Link in hash table or freelist */ + struct list_head list; /* Links in hash table or freelist */ u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ union handle_parts handle; @@ -77,21 +78,21 @@ static bool __stack_depot_early_init_passed __initdata; /* Initial seed for jhash2. */ #define STACK_HASH_SEED 0x9747b28c -/* Hash table of pointers to stored stack traces. */ -static struct stack_record **stack_table; +/* Hash table of stored stack records. */ +static struct list_head *stack_table; /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ static unsigned int stack_bucket_number_order; /* Hash mask for indexing the table. */ static unsigned int stack_hash_mask; -/* Array of memory regions that store stack traces. */ +/* Array of memory regions that store stack records. */ static void *stack_pools[DEPOT_MAX_POOLS]; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; /* Number of pools in stack_pools. */ static int pools_num; -/* Next stack in the freelist of stack records within stack_pools. */ -static struct stack_record *next_stack; +/* Freelist of stack records within stack_pools. */ +static LIST_HEAD(free_stacks); /* * Stack depot tries to keep an extra pool allocated even before it runs out * of space in the currently used pool. This flag marks whether this extra pool @@ -116,6 +117,15 @@ void __init stack_depot_request_early_init(void) __stack_depot_early_init_requested = true; } +/* Initialize list_head's within the hash table. */ +static void init_stack_table(unsigned long entries) +{ + unsigned long i; + + for (i = 0; i < entries; i++) + INIT_LIST_HEAD(&stack_table[i]); +} + /* Allocates a hash table via memblock. Can only be used during early boot. */ int __init stack_depot_early_init(void) { @@ -152,16 +162,16 @@ int __init stack_depot_early_init(void) /* * If stack_bucket_number_order is not set, leave entries as 0 to rely - * on the automatic calculations performed by alloc_large_system_hash. + * on the automatic calculations performed by alloc_large_system_hash(). */ if (stack_bucket_number_order) entries = 1UL << stack_bucket_number_order; pr_info("allocating hash table via alloc_large_system_hash\n"); stack_table = alloc_large_system_hash("stackdepot", - sizeof(struct stack_record *), + sizeof(struct list_head), entries, STACK_HASH_TABLE_SCALE, - HASH_EARLY | HASH_ZERO, + HASH_EARLY, NULL, &stack_hash_mask, 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, @@ -171,6 +181,14 @@ int __init stack_depot_early_init(void) stack_depot_disabled = true; return -ENOMEM; } + if (!entries) { + /* + * Obtain the number of entries that was calculated by + * alloc_large_system_hash(). + */ + entries = stack_hash_mask + 1; + } + init_stack_table(entries); return 0; } @@ -211,7 +229,7 @@ int stack_depot_init(void) entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); - stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); + stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); stack_depot_disabled = true; @@ -219,6 +237,7 @@ int stack_depot_init(void) goto out_unlock; } stack_hash_mask = entries - 1; + init_stack_table(entries); out_unlock: mutex_unlock(&stack_depot_init_mutex); @@ -230,31 +249,24 @@ EXPORT_SYMBOL_GPL(stack_depot_init); /* Initializes a stack depol pool. */ static void depot_init_pool(void *pool) { - const int records_in_pool = DEPOT_POOL_SIZE / DEPOT_STACK_RECORD_SIZE; - int i, offset; + int offset; lockdep_assert_held_write(&pool_rwlock); - /* Initialize handles and link stack records to each other. */ - for (i = 0, offset = 0; - offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; - i++, offset += DEPOT_STACK_RECORD_SIZE) { + WARN_ON(!list_empty(&free_stacks)); + + /* Initialize handles and link stack records into the freelist. */ + for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; + offset += DEPOT_STACK_RECORD_SIZE) { struct stack_record *stack = pool + offset; stack->handle.pool_index = pools_num; stack->handle.offset = offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; - if (i < records_in_pool - 1) - stack->next = (void *)stack + DEPOT_STACK_RECORD_SIZE; - else - stack->next = NULL; + list_add(&stack->list, &free_stacks); } - /* Link stack records into the freelist. */ - WARN_ON(next_stack); - next_stack = pool; - /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; pools_num++; @@ -295,7 +307,7 @@ static bool depot_update_pools(void **prealloc) lockdep_assert_held_write(&pool_rwlock); /* Check if we still have objects in the freelist. */ - if (next_stack) + if (!list_empty(&free_stacks)) goto out_keep_prealloc; /* Check if we have a new pool saved and use it. */ @@ -346,19 +358,18 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return NULL; /* Check if we have a stack record to save the stack trace. */ - stack = next_stack; - if (!stack) + if (list_empty(&free_stacks)) return NULL; - /* Advance the freelist. */ - next_stack = stack->next; + /* Get and unlink the first entry from the freelist. */ + stack = list_first_entry(&free_stacks, struct stack_record, list); + list_del(&stack->list); /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) size = CONFIG_STACKDEPOT_MAX_FRAMES; /* Save the stack trace. */ - stack->next = NULL; stack->hash = hash; stack->size = size; /* stack->handle is already filled in by depot_init_pool(). */ @@ -420,15 +431,17 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, } /* Finds a stack in a bucket of the hash table. */ -static inline struct stack_record *find_stack(struct stack_record *bucket, +static inline struct stack_record *find_stack(struct list_head *bucket, unsigned long *entries, int size, u32 hash) { + struct list_head *pos; struct stack_record *found; lockdep_assert_held(&pool_rwlock); - for (found = bucket; found; found = found->next) { + list_for_each(pos, bucket) { + found = list_entry(pos, struct stack_record, list); if (found->hash == hash && found->size == size && !stackdepot_memcmp(entries, found->entries, size)) @@ -441,7 +454,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags, bool can_alloc) { - struct stack_record *found = NULL, **bucket; + struct list_head *bucket; + struct stack_record *found = NULL; depot_stack_handle_t handle = 0; struct page *page = NULL; void *prealloc = NULL; @@ -468,7 +482,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, read_lock_irqsave(&pool_rwlock, flags); /* Fast path: look the stack trace up without full locking. */ - found = find_stack(*bucket, entries, nr_entries, hash); + found = find_stack(bucket, entries, nr_entries, hash); if (found) { read_unlock_irqrestore(&pool_rwlock, flags); goto exit; @@ -500,14 +514,13 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, write_lock_irqsave(&pool_rwlock, flags); - found = find_stack(*bucket, entries, nr_entries, hash); + found = find_stack(bucket, entries, nr_entries, hash); if (!found) { struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { - new->next = *bucket; - *bucket = new; + list_add(&new->list, bucket); found = new; } } else if (prealloc) { -- cgit From 022012dcf44209074af97b6ae531a10c08736b31 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:13 +0100 Subject: lib/stackdepot, kasan: add flags to __stack_depot_save and rename Change the bool can_alloc argument of __stack_depot_save to a u32 argument that accepts a set of flags. The following patch will add another flag to stack_depot_save_flags besides the existing STACK_DEPOT_FLAG_CAN_ALLOC. Also rename the function to stack_depot_save_flags, as __stack_depot_save is a cryptic name, Link: https://lkml.kernel.org/r/645fa15239621eebbd3a10331e5864b718839512.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4bb0af423f82..59d61d5c09a7 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -450,19 +450,24 @@ static inline struct stack_record *find_stack(struct list_head *bucket, return NULL; } -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t alloc_flags, bool can_alloc) +depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags, + depot_flags_t depot_flags) { struct list_head *bucket; struct stack_record *found = NULL; depot_stack_handle_t handle = 0; struct page *page = NULL; void *prealloc = NULL; + bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; bool need_alloc = false; unsigned long flags; u32 hash; + if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) + return 0; + /* * If this stack trace is from an interrupt, including anything before * interrupt entry usually leads to unbounded stack depot growth. @@ -541,13 +546,14 @@ exit: handle = found->handle.handle; return handle; } -EXPORT_SYMBOL_GPL(__stack_depot_save); +EXPORT_SYMBOL_GPL(stack_depot_save_flags); depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) { - return __stack_depot_save(entries, nr_entries, alloc_flags, true); + return stack_depot_save_flags(entries, nr_entries, alloc_flags, + STACK_DEPOT_FLAG_CAN_ALLOC); } EXPORT_SYMBOL_GPL(stack_depot_save); -- cgit From 410b764f89f59cce858d94fc781b68c1f27a0ca9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:14 +0100 Subject: lib/stackdepot: add refcount for records Add a reference counter for how many times a stack records has been added to stack depot. Add a new STACK_DEPOT_FLAG_GET flag to stack_depot_save_flags that instructs the stack depot to increment the refcount. Do not yet decrement the refcount; this is implemented in one of the following patches. Do not yet enable any users to use the flag to avoid overflowing the refcount. This is preparatory patch for implementing the eviction of stack records from the stack depot. Link: https://lkml.kernel.org/r/a3fc14a2359d019d2a008d4ff8b46a665371ffee.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 59d61d5c09a7..911dee11bf39 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ struct stack_record { u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ union handle_parts handle; + refcount_t count; unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ }; @@ -373,6 +375,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->hash = hash; stack->size = size; /* stack->handle is already filled in by depot_init_pool(). */ + refcount_set(&stack->count, 1); memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); /* @@ -489,6 +492,8 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, /* Fast path: look the stack trace up without full locking. */ found = find_stack(bucket, entries, nr_entries, hash); if (found) { + if (depot_flags & STACK_DEPOT_FLAG_GET) + refcount_inc(&found->count); read_unlock_irqrestore(&pool_rwlock, flags); goto exit; } @@ -528,12 +533,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, list_add(&new->list, bucket); found = new; } - } else if (prealloc) { + } else { + if (depot_flags & STACK_DEPOT_FLAG_GET) + refcount_inc(&found->count); /* * Stack depot already contains this stack trace, but let's * keep the preallocated memory for future. */ - depot_keep_new_pool(&prealloc); + if (prealloc) + depot_keep_new_pool(&prealloc); } write_unlock_irqrestore(&pool_rwlock, flags); -- cgit From 108be8def46e9422f5a5abc96b0ab8fb6b3fb344 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:15 +0100 Subject: lib/stackdepot: allow users to evict stack traces Add stack_depot_put, a function that decrements the reference counter on a stack record and removes it from the stack depot once the counter reaches 0. Internally, when removing a stack record, the function unlinks it from the hash table bucket and returns to the freelist. With this change, the users of stack depot can call stack_depot_put when keeping a stack trace in the stack depot is not needed anymore. This allows avoiding polluting the stack depot with irrelevant stack traces and thus have more space to store the relevant ones before the stack depot reaches its capacity. Link: https://lkml.kernel.org/r/1d1ad5692ee43d4fc2b3fd9d221331d30b36123f.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 911dee11bf39..c1b31160f4b4 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -394,7 +394,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - lockdep_assert_held_read(&pool_rwlock); + lockdep_assert_held(&pool_rwlock); if (parts.pool_index > pools_num) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", @@ -410,6 +410,14 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) return stack; } +/* Links stack into the freelist. */ +static void depot_free_stack(struct stack_record *stack) +{ + lockdep_assert_held_write(&pool_rwlock); + + list_add(&stack->list, &free_stacks); +} + /* Calculates the hash for a stack. */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { @@ -592,6 +600,33 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, } EXPORT_SYMBOL_GPL(stack_depot_fetch); +void stack_depot_put(depot_stack_handle_t handle) +{ + struct stack_record *stack; + unsigned long flags; + + if (!handle || stack_depot_disabled) + return; + + write_lock_irqsave(&pool_rwlock, flags); + + stack = depot_fetch_stack(handle); + if (WARN_ON(!stack)) + goto out; + + if (refcount_dec_and_test(&stack->count)) { + /* Unlink stack from the hash table. */ + list_del(&stack->list); + + /* Free stack. */ + depot_free_stack(stack); + } + +out: + write_unlock_irqrestore(&pool_rwlock, flags); +} +EXPORT_SYMBOL_GPL(stack_depot_put); + void stack_depot_print(depot_stack_handle_t stack) { unsigned long *entries; -- cgit From bd9d9624b7136b69d892597b6a8cc482341e415a Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Nov 2023 18:47:20 +0100 Subject: lib/stackdepot: adjust DEPOT_POOLS_CAP for KMSAN KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack traces. As KMSAN does not support evicting stack traces from the stack depot, the stack depot capacity might be reached quickly with large stack records. Adjust the maximum number of stack depot pools for this case. The average size of a stack trace saved into the stack depot is ~16 frames. Thus, adjust the maximum pools number accordingly to keep the maximum number of stack traces that can be saved into the stack depot similar to the one that was allowed before the stack trace eviction changes. Link: https://lkml.kernel.org/r/301a115cf7ce8ddb42ef6de9151c2bb76ba728fc.1700502145.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Oscar Salvador Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index c1b31160f4b4..870cce2f4cbd 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -41,7 +41,17 @@ #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ STACK_DEPOT_EXTRA_BITS) +#if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32 +/* + * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack + * traces. As KMSAN does not support evicting stack traces from the stack + * depot, the stack depot capacity might be reached quickly with large stack + * records. Adjust the maximum number of stack depot pools for this case. + */ +#define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16)) +#else #define DEPOT_POOLS_CAP 8192 +#endif #define DEPOT_MAX_POOLS \ (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) -- cgit From a914d8d6cf204287aa2dfb9235d29d7944ad72a1 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 19 Dec 2023 22:19:50 +0100 Subject: lib/stackdepot: add printk_deferred_enter/exit guards Patch series "lib/stackdepot, kasan: fixes for stack eviction series", v3. A few fixes for the stack depot eviction series ("stackdepot: allow evicting stack traces"). This patch (of 5): Stack depot functions can be called from various contexts that do allocations, including with console locks taken. At the same time, stack depot functions might print WARNING's or refcount-related failures. This can cause a deadlock on console locks. Add printk_deferred_enter/exit guards to stack depot to avoid this. Link: https://lkml.kernel.org/r/cover.1703020707.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/82092f9040d075a161d1264377d51e0bac847e8a.1703020707.git.andreyknvl@google.com Fixes: 108be8def46e ("lib/stackdepot: allow users to evict stack traces") Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Andrey Konovalov Reported-by: Tetsuo Handa Closes: https://lore.kernel.org/all/000000000000f56750060b9ad216@google.com/ Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/stackdepot.c') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 870cce2f4cbd..a0be5d05c7f0 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -506,12 +506,14 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, bucket = &stack_table[hash & stack_hash_mask]; read_lock_irqsave(&pool_rwlock, flags); + printk_deferred_enter(); /* Fast path: look the stack trace up without full locking. */ found = find_stack(bucket, entries, nr_entries, hash); if (found) { if (depot_flags & STACK_DEPOT_FLAG_GET) refcount_inc(&found->count); + printk_deferred_exit(); read_unlock_irqrestore(&pool_rwlock, flags); goto exit; } @@ -520,6 +522,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, if (new_pool_required) need_alloc = true; + printk_deferred_exit(); read_unlock_irqrestore(&pool_rwlock, flags); /* @@ -541,6 +544,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, } write_lock_irqsave(&pool_rwlock, flags); + printk_deferred_enter(); found = find_stack(bucket, entries, nr_entries, hash); if (!found) { @@ -562,6 +566,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, depot_keep_new_pool(&prealloc); } + printk_deferred_exit(); write_unlock_irqrestore(&pool_rwlock, flags); exit: if (prealloc) { @@ -600,9 +605,11 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, return 0; read_lock_irqsave(&pool_rwlock, flags); + printk_deferred_enter(); stack = depot_fetch_stack(handle); + printk_deferred_exit(); read_unlock_irqrestore(&pool_rwlock, flags); *entries = stack->entries; @@ -619,6 +626,7 @@ void stack_depot_put(depot_stack_handle_t handle) return; write_lock_irqsave(&pool_rwlock, flags); + printk_deferred_enter(); stack = depot_fetch_stack(handle); if (WARN_ON(!stack)) @@ -633,6 +641,7 @@ void stack_depot_put(depot_stack_handle_t handle) } out: + printk_deferred_exit(); write_unlock_irqrestore(&pool_rwlock, flags); } EXPORT_SYMBOL_GPL(stack_depot_put); -- cgit