diff options
Diffstat (limited to 'mm/kfence')
-rw-r--r-- | mm/kfence/core.c | 74 | ||||
-rw-r--r-- | mm/kfence/kfence.h | 3 | ||||
-rw-r--r-- | mm/kfence/kfence_test.c | 21 | ||||
-rw-r--r-- | mm/kfence/report.c | 18 |
4 files changed, 88 insertions, 28 deletions
diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 8350f5c06f2e..102048821c22 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -21,6 +21,7 @@ #include <linux/log2.h> #include <linux/memblock.h> #include <linux/moduleparam.h> +#include <linux/nodemask.h> #include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/random.h> @@ -99,6 +100,10 @@ module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_inte static unsigned long kfence_skip_covered_thresh __read_mostly = 75; module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); +/* Allocation burst count: number of excess KFENCE allocations per sample. */ +static unsigned int kfence_burst __read_mostly; +module_param_named(burst, kfence_burst, uint, 0644); + /* If true, use a deferrable timer. */ static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE); module_param_named(deferrable, kfence_deferrable, bool, 0444); @@ -269,6 +274,13 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m return pageaddr; } +static inline bool kfence_obj_allocated(const struct kfence_metadata *meta) +{ + enum kfence_object_state state = READ_ONCE(meta->state); + + return state == KFENCE_OBJECT_ALLOCATED || state == KFENCE_OBJECT_RCU_FREEING; +} + /* * Update the object's metadata state, including updating the alloc/free stacks * depending on the state transition. @@ -278,10 +290,14 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex unsigned long *stack_entries, size_t num_stack_entries) { struct kfence_track *track = - next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; + next == KFENCE_OBJECT_ALLOCATED ? &meta->alloc_track : &meta->free_track; lockdep_assert_held(&meta->lock); + /* Stack has been saved when calling rcu, skip. */ + if (READ_ONCE(meta->state) == KFENCE_OBJECT_RCU_FREEING) + goto out; + if (stack_entries) { memcpy(track->stack_entries, stack_entries, num_stack_entries * sizeof(stack_entries[0])); @@ -297,6 +313,7 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex track->cpu = raw_smp_processor_id(); track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ +out: /* * Pairs with READ_ONCE() in * kfence_shutdown_cache(), @@ -305,8 +322,14 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex WRITE_ONCE(meta->state, next); } +#ifdef CONFIG_KMSAN +#define check_canary_attributes noinline __no_kmsan_checks +#else +#define check_canary_attributes inline +#endif + /* Check canary byte at @addr. */ -static inline bool check_canary_byte(u8 *addr) +static check_canary_attributes bool check_canary_byte(u8 *addr) { struct kfence_metadata *meta; unsigned long flags; @@ -341,7 +364,8 @@ static inline void set_canary(const struct kfence_metadata *meta) *((u64 *)addr) = KFENCE_CANARY_PATTERN_U64; } -static inline void check_canary(const struct kfence_metadata *meta) +static check_canary_attributes void +check_canary(const struct kfence_metadata *meta) { const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); unsigned long addr = pageaddr; @@ -495,7 +519,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z raw_spin_lock_irqsave(&meta->lock, flags); - if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) { + if (!kfence_obj_allocated(meta) || meta->addr != (unsigned long)addr) { /* Invalid or double-free, bail out. */ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); kfence_report_error((unsigned long)addr, false, NULL, meta, @@ -596,8 +620,8 @@ static unsigned long kfence_init_pool(void) __folio_set_slab(slab_folio(slab)); #ifdef CONFIG_MEMCG - slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg | - MEMCG_DATA_OBJCGS; + slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts | + MEMCG_DATA_OBJEXTS; #endif } @@ -646,7 +670,7 @@ reset_slab: if (!i || (i % 2)) continue; #ifdef CONFIG_MEMCG - slab->memcg_data = 0; + slab->obj_exts = 0; #endif __folio_clear_slab(slab_folio(slab)); } @@ -773,7 +797,7 @@ static void kfence_check_all_canary(void) for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { struct kfence_metadata *meta = &kfence_metadata[i]; - if (meta->state == KFENCE_OBJECT_ALLOCATED) + if (kfence_obj_allocated(meta)) check_canary(meta); } } @@ -820,12 +844,12 @@ static void toggle_allocation_gate(struct work_struct *work) if (!READ_ONCE(kfence_enabled)) return; - atomic_set(&kfence_allocation_gate, 0); + atomic_set(&kfence_allocation_gate, -kfence_burst); #ifdef CONFIG_KFENCE_STATIC_KEYS /* Enable static key, and await allocation to happen. */ static_branch_enable(&kfence_allocation_key); - wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate)); + wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate) > 0); /* Disable static key and reset timer. */ static_branch_disable(&kfence_allocation_key); @@ -999,12 +1023,11 @@ void kfence_shutdown_cache(struct kmem_cache *s) * the lock will not help, as different critical section * serialization will have the same outcome. */ - if (READ_ONCE(meta->cache) != s || - READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED) + if (READ_ONCE(meta->cache) != s || !kfence_obj_allocated(meta)) continue; raw_spin_lock_irqsave(&meta->lock, flags); - in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED; + in_use = meta->cache == s && kfence_obj_allocated(meta); raw_spin_unlock_irqrestore(&meta->lock, flags); if (in_use) { @@ -1045,6 +1068,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) unsigned long stack_entries[KFENCE_STACK_DEPTH]; size_t num_stack_entries; u32 alloc_stack_hash; + int allocation_gate; /* * Perform size check before switching kfence_allocation_gate, so that @@ -1061,6 +1085,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || + ((flags & __GFP_THISNODE) && num_online_nodes() > 1) || (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; @@ -1073,14 +1098,15 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (s->flags & SLAB_SKIP_KFENCE) return NULL; - if (atomic_inc_return(&kfence_allocation_gate) > 1) + allocation_gate = atomic_inc_return(&kfence_allocation_gate); + if (allocation_gate > 1) return NULL; #ifdef CONFIG_KFENCE_STATIC_KEYS /* * waitqueue_active() is fully ordered after the update of * kfence_allocation_gate per atomic_inc_return(). */ - if (waitqueue_active(&allocation_wait)) { + if (allocation_gate == 1 && waitqueue_active(&allocation_wait)) { /* * Calling wake_up() here may deadlock when allocations happen * from within timer code. Use an irq_work to defer it. @@ -1140,18 +1166,26 @@ void __kfence_free(void *addr) struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); #ifdef CONFIG_MEMCG - KFENCE_WARN_ON(meta->objcg); + KFENCE_WARN_ON(meta->obj_exts.objcg); #endif /* * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing * the object, as the object page may be recycled for other-typed * objects once it has been freed. meta->cache may be NULL if the cache * was destroyed. + * Save the stack trace here so that reports show where the user freed + * the object. */ - if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) + if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) { + unsigned long flags; + + raw_spin_lock_irqsave(&meta->lock, flags); + metadata_update_state(meta, KFENCE_OBJECT_RCU_FREEING, NULL, 0); + raw_spin_unlock_irqrestore(&meta->lock, flags); call_rcu(&meta->rcu_head, rcu_guarded_free); - else + } else { kfence_guarded_free(addr, meta, false); + } } bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs) @@ -1175,14 +1209,14 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs int distance = 0; meta = addr_to_metadata(addr - PAGE_SIZE); - if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { + if (meta && kfence_obj_allocated(meta)) { to_report = meta; /* Data race ok; distance calculation approximate. */ distance = addr - data_race(meta->addr + meta->size); } meta = addr_to_metadata(addr + PAGE_SIZE); - if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { + if (meta && kfence_obj_allocated(meta)) { /* Data race ok; distance calculation approximate. */ if (!to_report || distance > data_race(meta->addr) - addr) to_report = meta; diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index f46fbb03062b..dfba5ea06b01 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -38,6 +38,7 @@ enum kfence_object_state { KFENCE_OBJECT_UNUSED, /* Object is unused. */ KFENCE_OBJECT_ALLOCATED, /* Object is currently allocated. */ + KFENCE_OBJECT_RCU_FREEING, /* Object was allocated, and then being freed by rcu. */ KFENCE_OBJECT_FREED, /* Object was allocated, and then freed. */ }; @@ -98,7 +99,7 @@ struct kfence_metadata { /* For updating alloc_covered on frees. */ u32 alloc_stack_hash; #ifdef CONFIG_MEMCG - struct obj_cgroup *objcg; + struct slabobj_ext obj_exts; #endif }; diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 95b2b84c296d..00034e37bc9f 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/tracepoint.h> #include <trace/events/printk.h> @@ -88,7 +89,7 @@ struct expect_report { static const char *get_access_type(const struct expect_report *r) { - return r->is_write ? "write" : "read"; + return str_write_read(r->is_write); } /* Check observed report matches information in @r. */ @@ -383,6 +384,22 @@ static void test_use_after_free_read(struct kunit *test) KUNIT_EXPECT_TRUE(test, report_matches(&expect)); } +static void test_use_after_free_read_nofault(struct kunit *test) +{ + const size_t size = 32; + char *addr; + char dst; + int ret; + + setup_test_cache(test, size, 0, NULL); + addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY); + test_free(addr); + /* Use after free with *_nofault() */ + ret = copy_from_kernel_nofault(&dst, addr, 1); + KUNIT_EXPECT_EQ(test, ret, -EFAULT); + KUNIT_EXPECT_FALSE(test, report_available()); +} + static void test_double_free(struct kunit *test) { const size_t size = 32; @@ -780,6 +797,7 @@ static struct kunit_case kfence_test_cases[] = { KFENCE_KUNIT_CASE(test_out_of_bounds_read), KFENCE_KUNIT_CASE(test_out_of_bounds_write), KFENCE_KUNIT_CASE(test_use_after_free_read), + KFENCE_KUNIT_CASE(test_use_after_free_read_nofault), KFENCE_KUNIT_CASE(test_double_free), KFENCE_KUNIT_CASE(test_invalid_addr_free), KFENCE_KUNIT_CASE(test_corruption), @@ -852,3 +870,4 @@ kunit_test_suites(&kfence_test_suite); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Alexander Potapenko <glider@google.com>, Marco Elver <elver@google.com>"); +MODULE_DESCRIPTION("kfence unit test suite"); diff --git a/mm/kfence/report.c b/mm/kfence/report.c index c509aed326ce..10e6802a2edf 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -16,6 +16,8 @@ #include <linux/sprintf.h> #include <linux/stacktrace.h> #include <linux/string.h> +#include <linux/string_choices.h> +#include <linux/sched/clock.h> #include <trace/events/error_report.h> #include <asm/kfence.h> @@ -108,11 +110,15 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track; u64 ts_sec = track->ts_nsec; unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC); + u64 interval_nsec = local_clock() - track->ts_nsec; + unsigned long rem_interval_nsec = do_div(interval_nsec, NSEC_PER_SEC); /* Timestamp matches printk timestamp format. */ - seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus:\n", - show_alloc ? "allocated" : "freed", track->pid, - track->cpu, (unsigned long)ts_sec, rem_nsec / 1000); + seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus (%lu.%06lus ago):\n", + show_alloc ? "allocated" : meta->state == KFENCE_OBJECT_RCU_FREEING ? + "rcu freeing" : "freed", track->pid, + track->cpu, (unsigned long)ts_sec, rem_nsec / 1000, + (unsigned long)interval_nsec, rem_interval_nsec / 1000); if (track->num_stack_entries) { /* Skip allocation/free internals stack. */ @@ -145,7 +151,7 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met kfence_print_stack(seq, meta, true); - if (meta->state == KFENCE_OBJECT_FREED) { + if (meta->state == KFENCE_OBJECT_FREED || meta->state == KFENCE_OBJECT_RCU_FREEING) { seq_con_printf(seq, "\n"); kfence_print_stack(seq, meta, false); } @@ -179,7 +185,7 @@ static void print_diff_canary(unsigned long address, size_t bytes_to_show, static const char *get_access_type(bool is_write) { - return is_write ? "write" : "read"; + return str_write_read(is_write); } void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs, @@ -314,7 +320,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla kpp->kp_slab_cache = meta->cache; kpp->kp_objp = (void *)meta->addr; kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack); - if (meta->state == KFENCE_OBJECT_FREED) + if (meta->state == KFENCE_OBJECT_FREED || meta->state == KFENCE_OBJECT_RCU_FREEING) kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack); /* get_stack_skipnr() ensures the first entry is outside allocator. */ kpp->kp_ret = kpp->kp_stack[0]; |