diff options
Diffstat (limited to 'include/linux/slab.h')
| -rw-r--r-- | include/linux/slab.h | 1247 |
1 files changed, 928 insertions, 319 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h index 6c5cc0ea8713..cf443f064a66 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1,32 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). * * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. + * (C) Linux Foundation 2008-2013 + * Unified interface for all slab allocators */ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H +#include <linux/cache.h> #include <linux/gfp.h> +#include <linux/overflow.h> #include <linux/types.h> +#include <linux/rcupdate.h> #include <linux/workqueue.h> +#include <linux/percpu-refcount.h> +#include <linux/cleanup.h> +#include <linux/hash.h> +enum _slab_flag_bits { + _SLAB_CONSISTENCY_CHECKS, + _SLAB_RED_ZONE, + _SLAB_POISON, + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_CACHE_DMA, + _SLAB_CACHE_DMA32, + _SLAB_STORE_USER, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_TRACE, +#ifdef CONFIG_DEBUG_OBJECTS + _SLAB_DEBUG_OBJECTS, +#endif + _SLAB_NOLEAKTRACE, + _SLAB_NO_MERGE, +#ifdef CONFIG_FAILSLAB + _SLAB_FAILSLAB, +#endif +#ifdef CONFIG_MEMCG + _SLAB_ACCOUNT, +#endif +#ifdef CONFIG_KASAN_GENERIC + _SLAB_KASAN, +#endif + _SLAB_NO_USER_FLAGS, +#ifdef CONFIG_KFENCE + _SLAB_SKIP_KFENCE, +#endif +#ifndef CONFIG_SLUB_TINY + _SLAB_RECLAIM_ACCOUNT, +#endif + _SLAB_OBJECT_POISON, + _SLAB_CMPXCHG_DOUBLE, +#ifdef CONFIG_SLAB_OBJ_EXT + _SLAB_NO_OBJ_EXT, +#endif + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). - * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. - */ -#define SLAB_DEBUG_FREE 0x00000100UL /* DEBUG: Perform (expensive) checks on free */ -#define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ -#define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ -#define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ -#define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ -#define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ -/* - * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op + */ +/* DEBUG: Perform (expensive) checks on alloc/free */ +#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) +/* DEBUG: Red zone objs in a cache */ +#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) +/* DEBUG: Poison objects */ +#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) +/* Indicate a kmalloc slab */ +#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) +/** + * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. + * + * Sufficiently large objects are aligned on cache line boundary. For object + * size smaller than a half of cache line size, the alignment is on the half of + * cache line size. In general, if object size is smaller than 1/2^n of cache + * line size, the alignment is adjusted to 1/2^n. + * + * If explicit alignment is also requested by the respective + * &struct kmem_cache_args field, the greater of both is alignments is applied. + */ +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +/* Use GFP_DMA memory */ +#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) +/* Use GFP_DMA32 memory */ +#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) +/* DEBUG: Store the last owner for bug hunting */ +#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) +/* Panic if kmem_cache_create() fails */ +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +/** + * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -37,50 +110,140 @@ * stays valid, the trick to using this is relying on an independent * object validation pass. Something like: * - * rcu_read_lock() - * again: - * obj = lockless_lookup(key); - * if (obj) { - * if (!try_get_ref(obj)) // might fail for free objects - * goto again; - * - * if (obj->key != key) { // not the object we expected - * put_ref(obj); - * goto again; - * } - * } + * :: + * + * begin: + * rcu_read_lock(); + * obj = lockless_lookup(key); + * if (obj) { + * if (!try_get_ref(obj)) // might fail for free objects + * rcu_read_unlock(); + * goto begin; + * + * if (obj->key != key) { // not the object we expected + * put_ref(obj); + * rcu_read_unlock(); + * goto begin; + * } + * } * rcu_read_unlock(); * - * See also the comment on struct slab_rcu in mm/slab.c. + * This is useful if we need to approach a kernel structure obliquely, + * from its address obtained without the usual locking. We can lock + * the structure to stabilize it and check it's still at the given address, + * only if we can be sure that the memory has not been meanwhile reused + * for some other kind of object (which our subsystem's lock might corrupt). + * + * rcu_read_lock before reading the address, then rcu_read_unlock after + * taking the spinlock within the structure expected at that address. + * + * Note that object identity check has to be done *after* acquiring a + * reference, therefore user has to ensure proper ordering for loads. + * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU, + * the newly allocated object has to be fully initialized *before* its + * refcount gets initialized and proper ordering for stores is required. + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are + * designed with the proper fences required for reference counting objects + * allocated with SLAB_TYPESAFE_BY_RCU. + * + * Note that it is not possible to acquire a lock within a structure + * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference + * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages + * are not zeroed before being given to the slab, which means that any + * locks must be initialized after each and every kmem_struct_alloc(). + * Alternatively, make the ctor passed to kmem_cache_create() initialize + * the locks at page-allocation time, as is done in __i915_request_ctor(), + * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers + * to safely acquire those ctor-initialized locks under rcu_read_lock() + * protection. + * + * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ -#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ -#define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +/* Trace allocations and frees */ +#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS 0x00400000UL +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else -# define SLAB_DEBUG_OBJECTS 0x00000000UL +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif -#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ +/* Avoid kmemleak tracing */ +#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) -/* Don't track use of uninitialized memory */ -#ifdef CONFIG_KMEMCHECK -# define SLAB_NOTRACK 0x01000000UL +/* + * Prevent merging with compatible kmem caches. This flag should be used + * cautiously. Valid use cases: + * + * - caches created for self-tests (e.g. kunit) + * - general caches created and used by a subsystem, only when a + * (subsystem-specific) debug option is enabled + * - performance critical caches, should be very rare and consulted with slab + * maintainers, and not used together with CONFIG_SLUB_TINY + */ +#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) + +/* Fault injection mark */ +#ifdef CONFIG_FAILSLAB +# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else -# define SLAB_NOTRACK 0x00000000UL +# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif -#ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ +/** + * define SLAB_ACCOUNT - Account allocations to memcg. + * + * All object allocations from this cache will be memcg accounted, regardless of + * __GFP_ACCOUNT being or not being passed to individual allocations. + */ +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else -# define SLAB_FAILSLAB 0x00000000UL +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif + +#ifdef CONFIG_KASAN_GENERIC +#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) +#else +#define SLAB_KASAN __SLAB_FLAG_UNUSED +#endif + +/* + * Ignore user specified debugging flags. + * Intended for caches created for self-tests so they have only flags + * specified in the code and other flags are ignored. + */ +#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) + +#ifdef CONFIG_KFENCE +#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) +#else +#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ -#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ +/** + * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. + * + * Use this flag for caches that have an associated shrinker. As a result, slab + * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by + * mobility, and are accounted in SReclaimable counter in /proc/meminfo + */ +#ifndef CONFIG_SLUB_TINY +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) +#else +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED +#endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ + +/* Slab created using create_boot_cache */ +#ifdef CONFIG_SLAB_OBJ_EXT +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) +#else +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED +#endif + /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -94,23 +257,215 @@ #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) +#include <linux/kasan.h> +struct list_lru; struct mem_cgroup; /* * struct kmem_cache related prototypes */ -void __init kmem_cache_init(void); -int slab_is_available(void); +bool slab_is_available(void); + +/** + * struct kmem_cache_args - Less common arguments for kmem_cache_create() + * + * Any uninitialized fields of the structure are interpreted as unused. The + * exception is @freeptr_offset where %0 is a valid value, so + * @use_freeptr_offset must be also set to %true in order to interpret the field + * as used. For @useroffset %0 is also valid, but only with non-%0 + * @usersize. + * + * When %NULL args is passed to kmem_cache_create(), it is equivalent to all + * fields unused. + */ +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); + /** + * @sheaf_capacity: Enable sheaves of given capacity for the cache. + * + * With a non-zero value, allocations from the cache go through caching + * arrays called sheaves. Each cpu has a main sheaf that's always + * present, and a spare sheaf that may be not present. When both become + * empty, there's an attempt to replace an empty sheaf with a full sheaf + * from the per-node barn. + * + * When no full sheaf is available, and gfp flags allow blocking, a + * sheaf is allocated and filled from slab(s) using bulk allocation. + * Otherwise the allocation falls back to the normal operation + * allocating a single object from a slab. + * + * Analogically when freeing and both percpu sheaves are full, the barn + * may replace it with an empty sheaf, unless it's over capacity. In + * that case a sheaf is bulk freed to slab pages. + * + * The sheaves do not enforce NUMA placement of objects, so allocations + * via kmem_cache_alloc_node() with a node specified other than + * NUMA_NO_NODE will bypass them. + * + * Bulk allocation and free operations also try to use the cpu sheaves + * and barn, but fallback to using slab pages directly. + * + * When slub_debug is enabled for the cache, the sheaf_capacity argument + * is ignored. + * + * %0 means no sheaves will be created. + */ + unsigned int sheaf_capacity; +}; + +struct kmem_cache *__kmem_cache_create_args(const char *name, + unsigned int object_size, + struct kmem_cache_args *args, + slab_flags_t flags); +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + slab_flags_t flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +/** + * kmem_cache_create_usercopy - Create a kmem cache with a region suitable + * for copying to userspace. + * @name: A string which is used in /proc/slabinfo to identify this cache. + * @size: The size of objects to be created in this cache. + * @align: The required alignment for the objects. + * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size + * @ctor: A constructor for the objects, or %NULL. + * + * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() + * if whitelisting a single field is sufficient, or kmem_cache_create() with + * the necessary parameters passed via the args parameter (see + * &struct kmem_cache_args) + * + * Return: a pointer to the cache on success, NULL on failure. + */ +static inline struct kmem_cache * +kmem_cache_create_usercopy(const char *name, unsigned int size, + unsigned int align, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + .useroffset = useroffset, + .usersize = usersize, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + slab_flags_t flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + /* Make sure we don't get passed garbage. */ + if (WARN_ON_ONCE(args)) + return ERR_PTR(-EINVAL); + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +/** + * kmem_cache_create - Create a kmem cache. + * @__name: A string which is used in /proc/slabinfo to identify this cache. + * @__object_size: The size of objects to be created in this cache. + * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL + * means defaults will be used for all the arguments. + * + * This is currently implemented as a macro using ``_Generic()`` to call + * either the new variant of the function, or a legacy one. + * + * The new variant has 4 parameters: + * ``kmem_cache_create(name, object_size, args, flags)`` + * + * See __kmem_cache_create_args() which implements this. + * + * The legacy variant has 5 parameters: + * ``kmem_cache_create(name, object_size, align, flags, ctor)`` + * + * The align and ctor parameters map to the respective fields of + * &struct kmem_cache_args + * + * Context: Cannot be called within a interrupt, but can be interrupted. + * + * Return: a pointer to the cache on success, NULL on failure. + */ +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) -struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - unsigned long, - void (*)(void *)); -struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, - unsigned long, void (*)(void *), struct kmem_cache *); -void kmem_cache_destroy(struct kmem_cache *); -int kmem_cache_shrink(struct kmem_cache *); -void kmem_cache_free(struct kmem_cache *, void *); +void kmem_cache_destroy(struct kmem_cache *s); +int kmem_cache_shrink(struct kmem_cache *s); /* * Please use this macro to create slab caches. Simply specify the @@ -120,109 +475,130 @@ void kmem_cache_free(struct kmem_cache *, void *); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + }, (__flags)) + +/* + * To whitelist a single field for copying to/from usercopy, use this + * macro instead for KMEM_CACHE() above. + */ +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + .useroffset = offsetof(struct __struct, __field), \ + .usersize = sizeof_field(struct __struct, __field), \ + }, (__flags)) /* * Common kmalloc functions provided by all allocators */ -void * __must_check __krealloc(const void *, size_t, gfp_t); -void * __must_check krealloc(const void *, size_t, gfp_t); -void kfree(const void *); -void kzfree(const void *); -size_t ksize(const void *); +void * __must_check krealloc_node_align_noprof(const void *objp, size_t new_size, + unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define krealloc_noprof(_o, _s, _f) krealloc_node_align_noprof(_o, _s, 1, _f, NUMA_NO_NODE) +#define krealloc_node_align(...) alloc_hooks(krealloc_node_align_noprof(__VA_ARGS__)) +#define krealloc_node(_o, _s, _f, _n) krealloc_node_align(_o, _s, 1, _f, _n) +#define krealloc(...) krealloc_node(__VA_ARGS__, NUMA_NO_NODE) + +void kfree(const void *objp); +void kfree_nolock(const void *objp); +void kfree_sensitive(const void *objp); +size_t __ksize(const void *objp); + +DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) +DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) + +/** + * ksize - Report actual allocation size of associated object + * + * @objp: Pointer returned from a prior kmalloc()-family allocation. + * + * This should not be used for writing beyond the originally requested + * allocation size. Either use krealloc() or round up the allocation size + * with kmalloc_size_roundup() prior to allocation. If this is used to + * access beyond the originally requested allocation size, UBSAN_BOUNDS + * and/or FORTIFY_SOURCE may trip, since they only know about the + * originally allocated size via the __alloc_size attribute. + */ +size_t ksize(const void *objp); + +#ifdef CONFIG_PRINTK +bool kmem_dump_obj(void *object); +#else +static inline bool kmem_dump_obj(void *object) { return false; } +#endif /* * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than the alignment of a 64-bit integer. - * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that. + * Setting ARCH_DMA_MINALIGN in arch headers allows that. */ -#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8 +#ifdef ARCH_HAS_DMA_MINALIGN +#if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN -#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN -#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) -#else +#endif +#endif + +#ifndef ARCH_KMALLOC_MINALIGN #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) +#elif ARCH_KMALLOC_MINALIGN > 8 +#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN +#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) #endif -#ifdef CONFIG_SLOB /* - * Common fields provided in kmem_cache by all slab allocators - * This struct is either used directly by the allocator (SLOB) - * or the allocator must include definitions for all fields - * provided in kmem_cache_common in their definition of kmem_cache. - * - * Once we can do anonymous structs (C11 standard) we could put a - * anonymous struct definition in these allocators so that the - * separate allocations in the kmem_cache structure of SLAB and - * SLUB is no longer needed. - */ -struct kmem_cache { - unsigned int object_size;/* The original size of the object */ - unsigned int size; /* The aligned/padded/added on size */ - unsigned int align; /* Alignment as calculated */ - unsigned long flags; /* Active flags on the slab */ - const char *name; /* Slab name for sysfs */ - int refcount; /* Use counter */ - void (*ctor)(void *); /* Called on object slot creation */ - struct list_head list; /* List of all slab caches on the system */ -}; - -#endif /* CONFIG_SLOB */ + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif /* - * Kmalloc array related definitions + * Arches can define this function if they want to decide the minimum slab + * alignment at runtime. The value returned by the function must be a power + * of two and >= ARCH_SLAB_MINALIGN. */ +#ifndef arch_slab_minalign +static inline unsigned int arch_slab_minalign(void) +{ + return ARCH_SLAB_MINALIGN; +} +#endif -#ifdef CONFIG_SLAB /* - * The largest kmalloc size supported by the SLAB allocators is - * 32 megabyte (2^25) or the maximum allocatable page order if that is - * less than 32 MB. - * - * WARNING: Its not easy to increase this value since the allocators have - * to do various tricks to work around compiler limitations in order to - * ensure proper constant folding. + * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. + * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN + * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. */ -#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ - (MAX_ORDER + PAGE_SHIFT - 1) : 25) -#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH -#ifndef KMALLOC_SHIFT_LOW -#define KMALLOC_SHIFT_LOW 5 -#endif -#endif +#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) +#define __assume_page_alignment __assume_aligned(PAGE_SIZE) -#ifdef CONFIG_SLUB /* - * SLUB allocates up to order 2 pages directly and otherwise - * passes the request to the page allocator. + * Kmalloc array related definitions */ -#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) -#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT) -#ifndef KMALLOC_SHIFT_LOW -#define KMALLOC_SHIFT_LOW 3 -#endif -#endif -#ifdef CONFIG_SLOB /* - * SLOB passes all page size and larger requests to the page allocator. - * No kmalloc array is necessary since objects of different sizes can - * be allocated from the same page. + * SLUB directly allocates requests fitting in to an order-1 page + * (PAGE_SIZE*2). Larger requests are passed to the page allocator. */ -#define KMALLOC_SHIFT_MAX 30 -#define KMALLOC_SHIFT_HIGH PAGE_SHIFT +#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) +#define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 3 #endif -#endif /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) -/* Maximum order allocatable via the slab allocagtor */ +/* Maximum order allocatable via the slab allocator */ #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* @@ -232,21 +608,114 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif -#ifndef CONFIG_SLOB -extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +/* + * This restriction comes from byte sized index implementation. + * Page size is normally 2^12 bytes and, in this case, if we want to use + * byte sized index which can represent 2^8 entries, the size of the object + * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. + * If minimum size of kmalloc is less than 16, we use it as minimum object + * size and give up to use byte sized index. + */ +#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ + (KMALLOC_MIN_SIZE) : 16) + +#ifdef CONFIG_RANDOM_KMALLOC_CACHES +#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies +#else +#define RANDOM_KMALLOC_CACHES_NR 0 +#endif + +/* + * Whenever changing this, take care of that kmalloc_type() and + * create_kmalloc_caches() still work as intended. + * + * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP + * is for accounted but unreclaimable and non-dma objects. All the other + * kmem caches can have both accounted and unaccounted objects. + */ +enum kmalloc_cache_type { + KMALLOC_NORMAL = 0, +#ifndef CONFIG_ZONE_DMA + KMALLOC_DMA = KMALLOC_NORMAL, +#endif +#ifndef CONFIG_MEMCG + KMALLOC_CGROUP = KMALLOC_NORMAL, +#endif + KMALLOC_RANDOM_START = KMALLOC_NORMAL, + KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, +#ifdef CONFIG_SLUB_TINY + KMALLOC_RECLAIM = KMALLOC_NORMAL, +#else + KMALLOC_RECLAIM, +#endif #ifdef CONFIG_ZONE_DMA -extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; + KMALLOC_DMA, +#endif +#ifdef CONFIG_MEMCG + KMALLOC_CGROUP, +#endif + NR_KMALLOC_TYPES +}; + +typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; + +extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; + +/* + * Define gfp bits that should not be set for KMALLOC_NORMAL. + */ +#define KMALLOC_NOT_NORMAL_BITS \ + (__GFP_RECLAIMABLE | \ + (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ + (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) + +extern unsigned long random_kmalloc_seed; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) +{ + /* + * The most common case is KMALLOC_NORMAL, so test for it + * with a single branch for all the relevant flags. + */ + if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) +#ifdef CONFIG_RANDOM_KMALLOC_CACHES + /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ + return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, + ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); +#else + return KMALLOC_NORMAL; #endif + /* + * At least one of the flags has to be set. Their priorities in + * decreasing order are: + * 1) __GFP_DMA + * 2) __GFP_RECLAIMABLE + * 3) __GFP_ACCOUNT + */ + if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) + return KMALLOC_DMA; + if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) + return KMALLOC_RECLAIM; + else + return KMALLOC_CGROUP; +} + /* * Figure out which kmalloc slab an allocation of a certain size * belongs to. * 0 = zero alloc * 1 = 65 .. 96 bytes - * 2 = 120 .. 192 bytes - * n = 2^(n-1) .. 2^n -1 + * 2 = 129 .. 192 bytes + * n = 2^(n-1)+1 .. 2^n + * + * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; + * typical usage is via kmalloc_index() and therefore evaluated at compile-time. + * Callers where !size_is_constant should only be test modules, where runtime + * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). */ -static __always_inline int kmalloc_index(size_t size) +static __always_inline unsigned int __kmalloc_index(size_t size, + bool size_is_constant) { if (!size) return 0; @@ -277,209 +746,309 @@ static __always_inline int kmalloc_index(size_t size) if (size <= 512 * 1024) return 19; if (size <= 1024 * 1024) return 20; if (size <= 2 * 1024 * 1024) return 21; - if (size <= 4 * 1024 * 1024) return 22; - if (size <= 8 * 1024 * 1024) return 23; - if (size <= 16 * 1024 * 1024) return 24; - if (size <= 32 * 1024 * 1024) return 25; - if (size <= 64 * 1024 * 1024) return 26; - BUG(); + + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) + BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); + else + BUG(); /* Will never be reached. Needed because the compiler may complain */ return -1; } -#endif /* !CONFIG_SLOB */ +static_assert(PAGE_SHIFT <= 20); +#define kmalloc_index(s) __kmalloc_index(s, true) -#ifdef CONFIG_SLAB -#include <linux/slab_def.h> -#endif +#include <linux/alloc_tag.h> -#ifdef CONFIG_SLUB -#include <linux/slub_def.h> -#endif +/** + * kmem_cache_alloc - Allocate an object + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * + * Allocate an object from this cache. + * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. + * + * Return: pointer to the new object or %NULL in case of error + */ +void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, + gfp_t flags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) -#ifdef CONFIG_SLOB -#include <linux/slob_def.h> -#endif +void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) + +/** + * kmem_cache_charge - memcg charge an already allocated slab memory + * @objp: address of the slab object to memcg charge + * @gfpflags: describe the allocation context + * + * kmem_cache_charge allows charging a slab object to the current memcg, + * primarily in cases where charging at allocation time might not be possible + * because the target memcg is not known (i.e. softirq context) + * + * The objp should be pointer returned by the slab allocator functions like + * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge + * behavior can be controlled through gfpflags parameter, which affects how the + * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes + * that overcharging is requested instead of failure, but is not applied for the + * internal metadata allocation. + * + * There are several cases where it will return true even if the charging was + * not done: + * More specifically: + * + * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. + * 2. Already charged slab objects. + * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() + * without __GFP_ACCOUNT + * 4. Allocating internal metadata has failed + * + * Return: true if charge was successful otherwise false. + */ +bool kmem_cache_charge(void *objp, gfp_t gfpflags); +void kmem_cache_free(struct kmem_cache *s, void *objp); + +kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)); /* - * Determine size used for the nth kmalloc cache. - * return size or 0 if a kmalloc cache for that - * size does not exist + * Bulk allocation and freeing operations. These are accelerated in an + * allocator specific way to avoid taking locks repeatedly or building + * metadata structures unnecessarily. + * + * Note that interrupts must be enabled when calling these functions. */ -static __always_inline int kmalloc_size(int n) -{ -#ifndef CONFIG_SLOB - if (n > 2) - return 1 << n; +void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); - if (n == 1 && KMALLOC_MIN_SIZE <= 32) - return 96; +int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); +#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) - if (n == 2 && KMALLOC_MIN_SIZE <= 64) - return 192; -#endif - return 0; +static __always_inline void kfree_bulk(size_t size, void **p) +{ + kmem_cache_free_bulk(NULL, size, p); } +void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, + int node) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) + +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *cachep, gfp_t gfp, + struct slab_sheaf *sheaf) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_from_sheaf(...) \ + alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__)) + +unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf); + /* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. + * These macros allow declaring a kmem_buckets * parameter alongside size, which + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call + * sites don't have to pass NULL. */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#ifdef CONFIG_SLAB_BUCKETS +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) +#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) +#define PASS_BUCKET_PARAM(_b) (_b) +#else +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) +#define PASS_BUCKET_PARAMS(_size, _b) (_size) +#define PASS_BUCKET_PARAM(_b) NULL #endif + /* - * This is the main placeholder for memcg-related information in kmem caches. - * struct kmem_cache will hold a pointer to it, so the memory cost while - * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it - * would otherwise be if that would be bundled in kmem_cache: we'll need an - * extra pointer chase. But the trade off clearly lays in favor of not - * penalizing non-users. - * - * Both the root cache and the child caches will have it. For the root cache, - * this will hold a dynamically allocated array large enough to hold - * information about the currently limited memcgs in the system. - * - * Child caches will hold extra metadata needed for its operation. Fields are: - * - * @memcg: pointer to the memcg this cache belongs to - * @list: list_head for the list of all caches in this memcg - * @root_cache: pointer to the global, root cache, this cache was derived from - * @dead: set to true after the memcg dies; the cache may still be around. - * @nr_pages: number of pages that belongs to this cache. - * @destroy: worker to be called whenever we are ready, or believe we may be - * ready, to destroy this cache. - */ -struct memcg_cache_params { - bool is_root_cache; - union { - struct kmem_cache *memcg_caches[0]; - struct { - struct mem_cgroup *memcg; - struct list_head list; - struct kmem_cache *root_cache; - bool dead; - atomic_t nr_pages; - struct work_struct destroy; - }; - }; -}; + * The following functions are not to be used directly and are intended only + * for internal use from kmalloc() and kmalloc_node() + * with the exception of kunit tests + */ + +void *__kmalloc_noprof(size_t size, gfp_t flags) + __assume_kmalloc_alignment __alloc_size(1); + +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) + __assume_kmalloc_alignment __alloc_size(1); -int memcg_update_all_caches(int num_memcgs); +void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); -struct seq_file; -int cache_show(struct kmem_cache *s, struct seq_file *m); -void print_slabinfo_header(struct seq_file *m); +void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) + __assume_kmalloc_alignment __alloc_size(4); + +void *__kmalloc_large_noprof(size_t size, gfp_t flags) + __assume_page_alignment __alloc_size(1); + +void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) + __assume_page_alignment __alloc_size(1); /** - * kmalloc - allocate memory + * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * The @flags argument may be one of: - * - * %GFP_USER - Allocate memory on behalf of user. May sleep. + * @flags: describe the allocation context * - * %GFP_KERNEL - Allocate normal kernel ram. May sleep. + * kmalloc is the normal method of allocating memory + * for objects smaller than page size in the kernel. * - * %GFP_ATOMIC - Allocation will not sleep. May use emergency pools. - * For example, use this inside interrupt handlers. + * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN + * bytes. For @size of power of two bytes, the alignment is also guaranteed + * to be at least to the size. For other sizes, the alignment is guaranteed to + * be at least the largest power-of-two divisor of @size. * - * %GFP_HIGHUSER - Allocate pages from high memory. + * The @flags argument may be one of the GFP flags defined at + * include/linux/gfp_types.h and described at + * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` * - * %GFP_NOIO - Do not do any I/O at all while trying to get memory. + * The recommended usage of the @flags is described at + * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` * - * %GFP_NOFS - Do not make any fs calls while trying to get memory. + * Below is a brief outline of the most useful GFP flags * - * %GFP_NOWAIT - Allocation will not sleep. + * %GFP_KERNEL + * Allocate normal kernel ram. May sleep. * - * %GFP_THISNODE - Allocate node-local memory only. + * %GFP_NOWAIT + * Allocation will not sleep. * - * %GFP_DMA - Allocation suitable for DMA. - * Should only be used for kmalloc() caches. Otherwise, use a - * slab created with SLAB_DMA. + * %GFP_ATOMIC + * Allocation will not sleep. May use emergency pools. * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * - * %__GFP_COLD - Request cache-cold pages instead of - * trying to return cache-warm pages. - * - * %__GFP_HIGH - This allocation has high priority and may use emergency pools. + * %__GFP_ZERO + * Zero the allocated memory before returning. Also see kzalloc(). * - * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail - * (think twice before using). + * %__GFP_HIGH + * This allocation has high priority and may use emergency pools. * - * %__GFP_NORETRY - If memory is not immediately available, - * then give up at once. + * %__GFP_NOFAIL + * Indicate that this allocation is in no way allowed to fail + * (think twice before using). * - * %__GFP_NOWARN - If allocation fails, don't issue any warnings. + * %__GFP_NORETRY + * If memory is not immediately available, + * then give up at once. * - * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * %__GFP_NOWARN + * If allocation fails, don't issue any warnings. * - * There are other flags available as well, but these are not intended - * for general use, and so are not documented here. For a full list of - * potential flags, always refer to linux/gfp.h. - * - * kmalloc is the normal method of allocating memory - * in the kernel. + * %__GFP_RETRY_MAYFAIL + * Try really hard to succeed the allocation but fail + * eventually. */ -static __always_inline void *kmalloc(size_t size, gfp_t flags); +static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && size) { + unsigned int index; -/** - * kmalloc_array - allocate memory for an array. - * @n: number of elements. - * @size: element size. - * @flags: the type of memory to allocate (see kmalloc). - */ -static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) + if (size > KMALLOC_MAX_CACHE_SIZE) + return __kmalloc_large_noprof(size, flags); + + index = kmalloc_index(size); + return __kmalloc_cache_noprof( + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], + flags, size); + } + return __kmalloc_noprof(size, flags); +} +#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) + +void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node); +#define kmalloc_nolock(...) alloc_hooks(kmalloc_nolock_noprof(__VA_ARGS__)) + +#define kmem_buckets_alloc(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + +#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) + +static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { - if (size != 0 && n > SIZE_MAX / size) - return NULL; - return __kmalloc(n * size, flags); + if (__builtin_constant_p(size) && size) { + unsigned int index; + + if (size > KMALLOC_MAX_CACHE_SIZE) + return __kmalloc_large_node_noprof(size, flags, node); + + index = kmalloc_index(size); + return __kmalloc_cache_node_noprof( + kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], + flags, node, size); + } + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } +#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) /** - * kcalloc - allocate memory for an array. The memory is set to zero. + * kmalloc_array - allocate memory for an array. * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { - return kmalloc_array(n, size, flags | __GFP_ZERO); + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + return kmalloc_noprof(bytes, flags); } +#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) -#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) /** - * kmalloc_node - allocate memory from a specific node - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @node: node to allocate from. + * krealloc_array - reallocate memory for an array. + * @p: pointer to the memory chunk to reallocate + * @new_n: new number of elements to alloc + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) * - * kmalloc() for non-local nodes, used to allocate from a specific node - * if available. Equivalent to kmalloc() in the non-NUMA single-node - * case. + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * See krealloc_noprof() for further details. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. */ -static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { - return kmalloc(size, flags); -} + size_t bytes; -static inline void *__kmalloc_node(size_t size, gfp_t flags, int node) -{ - return __kmalloc(size, flags); + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc_noprof(p, bytes, flags); } +#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) -void *kmem_cache_alloc(struct kmem_cache *, gfp_t); +/** + * kcalloc - allocate memory for an array. The memory is set to zero. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). + */ +#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int node) -{ - return kmem_cache_alloc(cachep, flags); -} -#endif /* !CONFIG_NUMA && !CONFIG_SLOB */ +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) +#define kmalloc_node_track_caller(...) \ + alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) /* * kmalloc_track_caller is a special version of kmalloc that records the @@ -489,82 +1058,122 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, * allocator where we care about the real place the memory allocation * request comes from. */ -#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \ - (defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \ - (defined(CONFIG_SLOB) && defined(CONFIG_TRACING)) -extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); -#define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, _RET_IP_) -#else -#define kmalloc_track_caller(size, flags) \ - __kmalloc(size, flags) -#endif /* DEBUG_SLAB */ +#define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) -#ifdef CONFIG_NUMA -/* - * kmalloc_node_track_caller is a special version of kmalloc_node that - * records the calling function of the routine calling it for slab leak - * tracking instead of just the calling function (confusing, eh?). - * It's useful when the call to kmalloc_node comes from a widely-used - * standard allocator where we care about the real place the memory - * allocation request comes from. - */ -#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \ - (defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \ - (defined(CONFIG_SLOB) && defined(CONFIG_TRACING)) -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) -#else -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node(size, flags, node) -#endif +#define kmalloc_track_caller_noprof(...) \ + kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) -#else /* CONFIG_NUMA */ +static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, + int node) +{ + size_t bytes; -#define kmalloc_node_track_caller(size, flags, node) \ - kmalloc_track_caller(size, flags) + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + if (__builtin_constant_p(n) && __builtin_constant_p(size)) + return kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); +} +#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) -#endif /* CONFIG_NUMA */ +#define kcalloc_node(_n, _size, _flags, _node) \ + kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) /* * Shortcuts */ -static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) -{ - return kmem_cache_alloc(k, flags | __GFP_ZERO); -} +#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) { - return kmalloc(size, flags | __GFP_ZERO); + return kmalloc_noprof(size, flags | __GFP_ZERO); } +#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) +#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -/** - * kzalloc_node - allocate zeroed memory from a particular memory node. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @node: memory node from which to allocate - */ -static inline void *kzalloc_node(size_t size, gfp_t flags, int node) +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), unsigned long align, + gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node_align_noprof(_size, _align, _flags, _node) \ + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, NULL), _align, _flags, _node) +#define kvmalloc_node_align(...) \ + alloc_hooks(kvmalloc_node_align_noprof(__VA_ARGS__)) +#define kvmalloc_node(_s, _f, _n) kvmalloc_node_align(_s, 1, _f, _n) +#define kvmalloc(...) kvmalloc_node(__VA_ARGS__, NUMA_NO_NODE) +#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) + +#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) + +#define kmem_buckets_valloc(_b, _size, _flags) \ + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), 1, _flags, NUMA_NO_NODE)) + +static inline __alloc_size(1, 2) void * +kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { - return kmalloc_node(size, flags | __GFP_ZERO, node); + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return kvmalloc_node_align_noprof(bytes, 1, flags, node); } -/* - * Determine the size of a slab object - */ -static inline unsigned int kmem_cache_size(struct kmem_cache *s) +#define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) +#define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) +#define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) + +#define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) +#define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) +#define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) + +void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long align, + gfp_t flags, int nid) __realloc_size(2); +#define kvrealloc_node_align(...) \ + alloc_hooks(kvrealloc_node_align_noprof(__VA_ARGS__)) +#define kvrealloc_node(_p, _s, _f, _n) kvrealloc_node_align(_p, _s, 1, _f, _n) +#define kvrealloc(...) kvrealloc_node(__VA_ARGS__, NUMA_NO_NODE) + +extern void kvfree(const void *addr); +DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) + +extern void kvfree_sensitive(const void *addr, size_t len); + +unsigned int kmem_cache_size(struct kmem_cache *s); + +#ifndef CONFIG_KVFREE_RCU_BATCHED +static inline void kvfree_rcu_barrier(void) { - return s->object_size; + rcu_barrier(); } +static inline void kfree_rcu_scheduler_running(void) { } +#else +void kvfree_rcu_barrier(void); + +void kfree_rcu_scheduler_running(void); +#endif + +/** + * kmalloc_size_roundup - Report allocation bucket size for the given size + * + * @size: Number of bytes to round up from. + * + * This returns the number of bytes that would be available in a kmalloc() + * allocation of @size bytes. For example, a 126 byte request would be + * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly + * for the general-purpose kmalloc()-based allocations, and is not for the + * pre-sized kmem_cache_alloc()-based allocations.) + * + * Use this to kmalloc() the full bucket size ahead of time instead of using + * ksize() to query the size after an allocation. + */ +size_t kmalloc_size_roundup(size_t size); + void __init kmem_cache_init_late(void); +void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */ |
