summaryrefslogtreecommitdiff
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c1740
1 files changed, 1249 insertions, 491 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 1bb2a93cf7b6..31e11ef256f9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -19,6 +19,7 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include "slab.h"
+#include <linux/vmalloc.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kasan.h>
@@ -218,6 +219,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
#endif /* CONFIG_SLUB_DEBUG */
+#ifdef CONFIG_NUMA
+static DEFINE_STATIC_KEY_FALSE(strict_numa);
+#endif
+
/* Structure holding parameters for get_partial() call chain */
struct partial_context {
gfp_t flags;
@@ -230,12 +235,6 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
}
-static inline bool slub_debug_orig_size(struct kmem_cache *s)
-{
- return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
- (s->flags & SLAB_KMALLOC));
-}
-
void *fixup_red_left(struct kmem_cache *s, void *p)
{
if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
@@ -466,12 +465,6 @@ static struct workqueue_struct *flushwq;
*******************************************************************/
/*
- * freeptr_t represents a SLUB freelist pointer, which might be encoded
- * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
- */
-typedef struct { unsigned long v; } freeptr_t;
-
-/*
* Returns freelist pointer (ptr). With hardening, this is obfuscated
* with an XOR of the address where the pointer is held and a per-cache
* random number.
@@ -557,6 +550,26 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
*(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr);
}
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+ return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+ if (freeptr_outside_object(s))
+ return s->inuse + sizeof(void *);
+ else
+ return s->inuse;
+}
+
/* Loop over all objects in a slab */
#define for_each_object(__p, __s, __addr, __objects) \
for (__p = fixup_red_left(__s, __addr); \
@@ -604,11 +617,21 @@ static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_slabs = nr_slabs;
}
+
+static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
+{
+ return s->cpu_partial_slabs;
+}
#else
static inline void
slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
}
+
+static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
+{
+ return 0;
+}
#endif /* CONFIG_SLUB_CPU_PARTIAL */
/*
@@ -616,18 +639,12 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
*/
static __always_inline void slab_lock(struct slab *slab)
{
- struct page *page = slab_page(slab);
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- bit_spin_lock(PG_locked, &page->flags);
+ bit_spin_lock(PG_locked, &slab->__page_flags);
}
static __always_inline void slab_unlock(struct slab *slab)
{
- struct page *page = slab_page(slab);
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- bit_spin_unlock(PG_locked, &page->flags);
+ bit_spin_unlock(PG_locked, &slab->__page_flags);
}
static inline bool
@@ -732,6 +749,42 @@ static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
return false;
}
+/*
+ * kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API
+ * family will round up the real request size to these fixed ones, so
+ * there could be an extra area than what is requested. Save the original
+ * request size in the meta data area, for better debug and sanity check.
+ */
+static inline void set_orig_size(struct kmem_cache *s,
+ void *object, unsigned int orig_size)
+{
+ void *p = kasan_reset_tag(object);
+
+ if (!slub_debug_orig_size(s))
+ return;
+
+ p += get_info_end(s);
+ p += sizeof(struct track) * 2;
+
+ *(unsigned int *)p = orig_size;
+}
+
+static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
+{
+ void *p = kasan_reset_tag(object);
+
+ if (is_kfence_address(object))
+ return kfence_ksize(object);
+
+ if (!slub_debug_orig_size(s))
+ return s->object_size;
+
+ p += get_info_end(s);
+ p += sizeof(struct track) * 2;
+
+ return *(unsigned int *)p;
+}
+
#ifdef CONFIG_SLUB_DEBUG
static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_SPINLOCK(object_map_lock);
@@ -764,6 +817,21 @@ static bool slab_add_kunit_errors(void)
kunit_put_resource(resource);
return true;
}
+
+bool slab_in_kunit_test(void)
+{
+ struct kunit_resource *resource;
+
+ if (!kunit_get_current_test())
+ return false;
+
+ resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
+ if (!resource)
+ return false;
+
+ kunit_put_resource(resource);
+ return true;
+}
#else
static inline bool slab_add_kunit_errors(void) { return false; }
#endif
@@ -805,10 +873,12 @@ static int disable_higher_order_debug;
static inline void metadata_access_enable(void)
{
kasan_disable_current();
+ kmsan_disable_current();
}
static inline void metadata_access_disable(void)
{
+ kmsan_enable_current();
kasan_enable_current();
}
@@ -845,26 +915,6 @@ static void print_section(char *level, char *text, u8 *addr,
metadata_access_disable();
}
-/*
- * See comment in calculate_sizes().
- */
-static inline bool freeptr_outside_object(struct kmem_cache *s)
-{
- return s->offset >= s->inuse;
-}
-
-/*
- * Return offset of the end of info block which is inuse + free pointer if
- * not overlapping with object.
- */
-static inline unsigned int get_info_end(struct kmem_cache *s)
-{
- if (freeptr_outside_object(s))
- return s->inuse + sizeof(void *);
- else
- return s->inuse;
-}
-
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
@@ -958,55 +1008,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_slab_info(const struct slab *slab)
{
- struct folio *folio = (struct folio *)slab_folio(slab);
-
pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
slab, slab->objects, slab->inuse, slab->freelist,
- folio_flags(folio, 0));
-}
-
-/*
- * kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API
- * family will round up the real request size to these fixed ones, so
- * there could be an extra area than what is requested. Save the original
- * request size in the meta data area, for better debug and sanity check.
- */
-static inline void set_orig_size(struct kmem_cache *s,
- void *object, unsigned int orig_size)
-{
- void *p = kasan_reset_tag(object);
- unsigned int kasan_meta_size;
-
- if (!slub_debug_orig_size(s))
- return;
-
- /*
- * KASAN can save its free meta data inside of the object at offset 0.
- * If this meta data size is larger than 'orig_size', it will overlap
- * the data redzone in [orig_size+1, object_size]. Thus, we adjust
- * 'orig_size' to be as at least as big as KASAN's meta data.
- */
- kasan_meta_size = kasan_metadata_size(s, true);
- if (kasan_meta_size > orig_size)
- orig_size = kasan_meta_size;
-
- p += get_info_end(s);
- p += sizeof(struct track) * 2;
-
- *(unsigned int *)p = orig_size;
-}
-
-static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
-{
- void *p = kasan_reset_tag(object);
-
- if (!slub_debug_orig_size(s))
- return s->object_size;
-
- p += get_info_end(s);
- p += sizeof(struct track) * 2;
-
- return *(unsigned int *)p;
+ &slab->__page_flags);
}
void skip_orig_size_check(struct kmem_cache *s, const void *object)
@@ -1014,22 +1018,31 @@ void skip_orig_size_check(struct kmem_cache *s, const void *object)
set_orig_size(s, (void *)object, s->object_size);
}
-static void slab_bug(struct kmem_cache *s, char *fmt, ...)
+static void __slab_bug(struct kmem_cache *s, const char *fmt, va_list argsp)
{
struct va_format vaf;
va_list args;
- va_start(args, fmt);
+ va_copy(args, argsp);
vaf.fmt = fmt;
vaf.va = &args;
pr_err("=============================================================================\n");
- pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
+ pr_err("BUG %s (%s): %pV\n", s ? s->name : "<unknown>", print_tainted(), &vaf);
pr_err("-----------------------------------------------------------------------------\n\n");
va_end(args);
}
+static void slab_bug(struct kmem_cache *s, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ __slab_bug(s, fmt, args);
+ va_end(args);
+}
+
__printf(2, 3)
-static void slab_fix(struct kmem_cache *s, char *fmt, ...)
+static void slab_fix(struct kmem_cache *s, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -1082,19 +1095,19 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
/* Beginning of the filler is the free pointer */
print_section(KERN_ERR, "Padding ", p + off,
size_from_object(s) - off);
-
- dump_stack();
}
static void object_err(struct kmem_cache *s, struct slab *slab,
- u8 *object, char *reason)
+ u8 *object, const char *reason)
{
if (slab_add_kunit_errors())
return;
- slab_bug(s, "%s", reason);
+ slab_bug(s, reason);
print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+ WARN_ON(1);
}
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
@@ -1111,22 +1124,30 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
return false;
}
+static void __slab_err(struct slab *slab)
+{
+ if (slab_in_kunit_test())
+ return;
+
+ print_slab_info(slab);
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+ WARN_ON(1);
+}
+
static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
const char *fmt, ...)
{
va_list args;
- char buf[100];
if (slab_add_kunit_errors())
return;
va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
+ __slab_bug(s, fmt, args);
va_end(args);
- slab_bug(s, "%s", buf);
- print_slab_info(slab);
- dump_stack();
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+ __slab_err(slab);
}
static void init_object(struct kmem_cache *s, void *object, u8 val)
@@ -1135,7 +1156,13 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
unsigned int poison_size = s->object_size;
if (s->flags & SLAB_RED_ZONE) {
- memset(p - s->red_left_pad, val, s->red_left_pad);
+ /*
+ * Here and below, avoid overwriting the KMSAN shadow. Keeping
+ * the shadow makes it possible to distinguish uninit-value
+ * from use-after-free.
+ */
+ memset_no_sanitize_memory(p - s->red_left_pad, val,
+ s->red_left_pad);
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
/*
@@ -1148,24 +1175,32 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
}
if (s->flags & __OBJECT_POISON) {
- memset(p, POISON_FREE, poison_size - 1);
- p[poison_size - 1] = POISON_END;
+ memset_no_sanitize_memory(p, POISON_FREE, poison_size - 1);
+ memset_no_sanitize_memory(p + poison_size - 1, POISON_END, 1);
}
if (s->flags & SLAB_RED_ZONE)
- memset(p + poison_size, val, s->inuse - poison_size);
+ memset_no_sanitize_memory(p + poison_size, val,
+ s->inuse - poison_size);
}
-static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
+static void restore_bytes(struct kmem_cache *s, const char *message, u8 data,
void *from, void *to)
{
slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data);
memset(from, data, to - from);
}
-static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
- u8 *object, char *what,
- u8 *start, unsigned int value, unsigned int bytes)
+#ifdef CONFIG_KMSAN
+#define pad_check_attributes noinline __no_kmsan_checks
+#else
+#define pad_check_attributes
+#endif
+
+static pad_check_attributes int
+check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
+ u8 *object, const char *what, u8 *start, unsigned int value,
+ unsigned int bytes, bool slab_obj_print)
{
u8 *fault;
u8 *end;
@@ -1184,12 +1219,11 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
if (slab_add_kunit_errors())
goto skip_bug_print;
- slab_bug(s, "%s overwritten", what);
- pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
- fault, end - 1, fault - addr,
- fault[0], value);
- print_trailer(s, slab, object);
- add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ pr_err("[%s overwritten] 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
+ what, fault, end - 1, fault - addr, fault[0], value);
+
+ if (slab_obj_print)
+ object_err(s, slab, object, "Object corrupt");
skip_bug_print:
restore_bytes(s, what, value, fault, end);
@@ -1212,8 +1246,8 @@ skip_bug_print:
* Padding is extended by another word if Redzoning is enabled and
* object_size == inuse.
*
- * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
- * 0xcc (RED_ACTIVE) for objects in use.
+ * We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
+ * 0xcc (SLUB_RED_ACTIVE) for objects in use.
*
* object + s->inuse
* Meta data starts here.
@@ -1253,11 +1287,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
return 1;
return check_bytes_and_report(s, slab, p, "Object padding",
- p + off, POISON_INUSE, size_from_object(s) - off);
+ p + off, POISON_INUSE, size_from_object(s) - off, true);
}
/* Check the pad bytes at the end of a slab page */
-static void slab_pad_check(struct kmem_cache *s, struct slab *slab)
+static pad_check_attributes void
+slab_pad_check(struct kmem_cache *s, struct slab *slab)
{
u8 *start;
u8 *fault;
@@ -1285,9 +1320,10 @@ static void slab_pad_check(struct kmem_cache *s, struct slab *slab)
while (end > fault && end[-1] == POISON_INUSE)
end--;
- slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
- fault, end - 1, fault - start);
+ slab_bug(s, "Padding overwritten. 0x%p-0x%p @offset=%tu",
+ fault, end - 1, fault - start);
print_section(KERN_ERR, "Padding ", pad, remainder);
+ __slab_err(slab);
restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
}
@@ -1298,15 +1334,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
u8 *p = object;
u8 *endobject = object + s->object_size;
unsigned int orig_size, kasan_meta_size;
+ int ret = 1;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
- object - s->red_left_pad, val, s->red_left_pad))
- return 0;
+ object - s->red_left_pad, val, s->red_left_pad, ret))
+ ret = 0;
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
- endobject, val, s->inuse - s->object_size))
- return 0;
+ endobject, val, s->inuse - s->object_size, ret))
+ ret = 0;
if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
orig_size = get_orig_size(s, object);
@@ -1314,15 +1351,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
if (s->object_size > orig_size &&
!check_bytes_and_report(s, slab, object,
"kmalloc Redzone", p + orig_size,
- val, s->object_size - orig_size)) {
- return 0;
+ val, s->object_size - orig_size, ret)) {
+ ret = 0;
}
}
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
- check_bytes_and_report(s, slab, p, "Alignment padding",
+ if (!check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
- s->inuse - s->object_size);
+ s->inuse - s->object_size, ret))
+ ret = 0;
}
}
@@ -1337,28 +1375,26 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
if (kasan_meta_size < s->object_size - 1 &&
!check_bytes_and_report(s, slab, p, "Poison",
p + kasan_meta_size, POISON_FREE,
- s->object_size - kasan_meta_size - 1))
- return 0;
+ s->object_size - kasan_meta_size - 1, ret))
+ ret = 0;
if (kasan_meta_size < s->object_size &&
!check_bytes_and_report(s, slab, p, "End Poison",
- p + s->object_size - 1, POISON_END, 1))
- return 0;
+ p + s->object_size - 1, POISON_END, 1, ret))
+ ret = 0;
}
/*
* check_pad_bytes cleans up on its own.
*/
- check_pad_bytes(s, slab, p);
+ if (!check_pad_bytes(s, slab, p))
+ ret = 0;
}
- if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
- /*
- * Object and freepointer overlap. Cannot check
- * freepointer while object is allocated.
- */
- return 1;
-
- /* Check free pointer validity */
- if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
+ /*
+ * Cannot check freepointer while object is allocated if
+ * object and freepointer overlap.
+ */
+ if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) &&
+ !check_valid_pointer(s, slab, get_freepointer(s, p))) {
object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
@@ -1366,9 +1402,10 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
* another error because the object count is now wrong.
*/
set_freepointer(s, p, NULL);
- return 0;
+ ret = 0;
}
- return 1;
+
+ return ret;
}
static int check_slab(struct kmem_cache *s, struct slab *slab)
@@ -1391,6 +1428,11 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
slab->inuse, slab->objects);
return 0;
}
+ if (slab->frozen) {
+ slab_err(s, slab, "Slab disabled since SLUB metadata consistency check failed");
+ return 0;
+ }
+
/* Slab_pad_check fixes things up after itself */
slab_pad_check(s, slab);
return 1;
@@ -1400,7 +1442,7 @@ static int check_slab(struct kmem_cache *s, struct slab *slab)
* Determine if a certain object in a slab is on the freelist. Must hold the
* slab lock to guarantee that the chains are in a consistent state.
*/
-static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
+static bool on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
{
int nr = 0;
void *fp;
@@ -1410,26 +1452,34 @@ static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
fp = slab->freelist;
while (fp && nr <= slab->objects) {
if (fp == search)
- return 1;
+ return true;
if (!check_valid_pointer(s, slab, fp)) {
if (object) {
object_err(s, slab, object,
"Freechain corrupt");
set_freepointer(s, object, NULL);
+ break;
} else {
slab_err(s, slab, "Freepointer corrupt");
slab->freelist = NULL;
slab->inuse = slab->objects;
slab_fix(s, "Freelist cleared");
- return 0;
+ return false;
}
- break;
}
object = fp;
fp = get_freepointer(s, object);
nr++;
}
+ if (nr > slab->objects) {
+ slab_err(s, slab, "Freelist cycle detected");
+ slab->freelist = NULL;
+ slab->inuse = slab->objects;
+ slab_fix(s, "Freelist cleared");
+ return false;
+ }
+
max_objects = order_objects(slab_order(slab), s->size);
if (max_objects > MAX_OBJS_PER_PAGE)
max_objects = MAX_OBJS_PER_PAGE;
@@ -1571,6 +1621,7 @@ bad:
slab_fix(s, "Marking all objects used");
slab->inuse = slab->objects;
slab->freelist = NULL;
+ slab->frozen = 1; /* mark consistency-failed slab as frozen */
}
return false;
}
@@ -1596,12 +1647,12 @@ static inline int free_consistency_checks(struct kmem_cache *s,
slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
object);
} else if (!slab->slab_cache) {
- pr_err("SLUB <none>: no slab for object 0x%p.\n",
- object);
- dump_stack();
- } else
+ slab_err(NULL, slab, "No slab cache for object 0x%p",
+ object);
+ } else {
object_err(s, slab, object,
- "page slab pointer corrupt.");
+ "page slab pointer corrupt.");
+ }
return 0;
}
return 1;
@@ -1855,7 +1906,6 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
int objects) {}
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
-
#ifndef CONFIG_SLUB_TINY
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
@@ -1865,186 +1915,357 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
#endif
#endif /* CONFIG_SLUB_DEBUG */
-static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
+#ifdef CONFIG_SLAB_OBJ_EXT
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+
+static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
{
- return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
- NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
+ struct slabobj_ext *slab_exts;
+ struct slab *obj_exts_slab;
+
+ obj_exts_slab = virt_to_slab(obj_exts);
+ slab_exts = slab_obj_exts(obj_exts_slab);
+ if (slab_exts) {
+ unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
+ obj_exts_slab, obj_exts);
+ /* codetag should be NULL */
+ WARN_ON(slab_exts[offs].ref.ct);
+ set_codetag_empty(&slab_exts[offs].ref);
+ }
}
-#ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_free_slab_cgroups(struct slab *slab)
+static inline void mark_failed_objexts_alloc(struct slab *slab)
{
- kfree(slab_objcgs(slab));
- slab->memcg_data = 0;
+ slab->obj_exts = OBJEXTS_ALLOC_FAIL;
}
-static inline size_t obj_full_size(struct kmem_cache *s)
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+ struct slabobj_ext *vec, unsigned int objects)
{
/*
- * For each accounted object there is an extra space which is used
- * to store obj_cgroup membership. Charge it too.
+ * If vector previously failed to allocate then we have live
+ * objects with no tag reference. Mark all references in this
+ * vector as empty to avoid warnings later on.
*/
- return s->size + sizeof(struct obj_cgroup *);
+ if (obj_exts & OBJEXTS_ALLOC_FAIL) {
+ unsigned int i;
+
+ for (i = 0; i < objects; i++)
+ set_codetag_empty(&vec[i].ref);
+ }
}
+#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
+static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {}
+static inline void mark_failed_objexts_alloc(struct slab *slab) {}
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+ struct slabobj_ext *vec, unsigned int objects) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
/*
- * Returns false if the allocation should fail.
+ * The allocated objcg pointers array is not accounted directly.
+ * Moreover, it should not come from DMA buffer and is not readily
+ * reclaimable. So those GFP bits should be masked off.
*/
-static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
+
+static inline void init_slab_obj_exts(struct slab *slab)
{
- /*
- * The obtained objcg pointer is safe to use within the current scope,
- * defined by current task or set_active_memcg() pair.
- * obj_cgroup_get() is used to get a permanent reference.
- */
- struct obj_cgroup *objcg = current_obj_cgroup();
- if (!objcg)
- return true;
+ slab->obj_exts = 0;
+}
- if (lru) {
- int ret;
- struct mem_cgroup *memcg;
+int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
+{
+ unsigned int objects = objs_per_slab(s, slab);
+ unsigned long new_exts;
+ unsigned long old_exts;
+ struct slabobj_ext *vec;
- memcg = get_mem_cgroup_from_objcg(objcg);
- ret = memcg_list_lru_alloc(memcg, lru, flags);
- css_put(&memcg->css);
+ gfp &= ~OBJCGS_CLEAR_MASK;
+ /* Prevent recursive extension vector allocation */
+ gfp |= __GFP_NO_OBJ_EXT;
+ vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp,
+ slab_nid(slab));
+ if (!vec) {
+ /* Mark vectors which failed to allocate */
+ if (new_slab)
+ mark_failed_objexts_alloc(slab);
- if (ret)
- return false;
+ return -ENOMEM;
}
- if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
- return false;
+ new_exts = (unsigned long)vec;
+#ifdef CONFIG_MEMCG
+ new_exts |= MEMCG_DATA_OBJEXTS;
+#endif
+ old_exts = READ_ONCE(slab->obj_exts);
+ handle_failed_objexts_alloc(old_exts, vec, objects);
+ if (new_slab) {
+ /*
+ * If the slab is brand new and nobody can yet access its
+ * obj_exts, no synchronization is required and obj_exts can
+ * be simply assigned.
+ */
+ slab->obj_exts = new_exts;
+ } else if ((old_exts & ~OBJEXTS_FLAGS_MASK) ||
+ cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
+ /*
+ * If the slab is already in use, somebody can allocate and
+ * assign slabobj_exts in parallel. In this case the existing
+ * objcg vector should be reused.
+ */
+ mark_objexts_empty(vec);
+ kfree(vec);
+ return 0;
+ }
- *objcgp = objcg;
- return true;
+ kmemleak_not_leak(vec);
+ return 0;
}
-/*
- * Returns false if the allocation should fail.
- */
-static __fastpath_inline
-bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
- struct obj_cgroup **objcgp, size_t objects,
- gfp_t flags)
+static inline void free_slab_obj_exts(struct slab *slab)
{
- if (!memcg_kmem_online())
- return true;
+ struct slabobj_ext *obj_exts;
- if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
- return true;
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
+ return;
+
+ /*
+ * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
+ * corresponding extension will be NULL. alloc_tag_sub() will throw a
+ * warning if slab has extensions but the extension of an object is
+ * NULL, therefore replace NULL with CODETAG_EMPTY to indicate that
+ * the extension for obj_exts is expected to be NULL.
+ */
+ mark_objexts_empty(obj_exts);
+ kfree(obj_exts);
+ slab->obj_exts = 0;
+}
- return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
- flags));
+#else /* CONFIG_SLAB_OBJ_EXT */
+
+static inline void init_slab_obj_exts(struct slab *slab)
+{
}
-static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
- gfp_t flags, size_t size,
- void **p)
+static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
+{
+ return 0;
+}
+
+static inline void free_slab_obj_exts(struct slab *slab)
+{
+}
+
+#endif /* CONFIG_SLAB_OBJ_EXT */
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+static inline struct slabobj_ext *
+prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
{
struct slab *slab;
- unsigned long off;
- size_t i;
- flags &= gfp_allowed_mask;
+ if (!p)
+ return NULL;
- for (i = 0; i < size; i++) {
- if (likely(p[i])) {
- slab = virt_to_slab(p[i]);
+ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
+ return NULL;
- if (!slab_objcgs(slab) &&
- memcg_alloc_slab_cgroups(slab, s, flags, false)) {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- continue;
- }
+ if (flags & __GFP_NO_OBJ_EXT)
+ return NULL;
- off = obj_to_index(s, slab, p[i]);
- obj_cgroup_get(objcg);
- slab_objcgs(slab)[off] = objcg;
- mod_objcg_state(objcg, slab_pgdat(slab),
- cache_vmstat_idx(s), obj_full_size(s));
- } else {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- }
+ slab = virt_to_slab(p);
+ if (!slab_obj_exts(slab) &&
+ alloc_slab_obj_exts(slab, s, flags, false)) {
+ pr_warn_once("%s, %s: Failed to create slab extension vector!\n",
+ __func__, s->name);
+ return NULL;
}
+
+ return slab_obj_exts(slab) + obj_to_index(s, slab, p);
}
-static __fastpath_inline
-void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
- gfp_t flags, size_t size, void **p)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline void
+__alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
{
- if (likely(!memcg_kmem_online() || !objcg))
+ struct slabobj_ext *obj_exts;
+
+ obj_exts = prepare_slab_obj_exts_hook(s, flags, object);
+ /*
+ * Currently obj_exts is used only for allocation profiling.
+ * If other users appear then mem_alloc_profiling_enabled()
+ * check should be added before alloc_tag_add().
+ */
+ if (likely(obj_exts))
+ alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
+}
+
+static inline void
+alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
+{
+ if (mem_alloc_profiling_enabled())
+ __alloc_tagging_slab_alloc_hook(s, object, flags);
+}
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline void
+__alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+ int objects)
+{
+ struct slabobj_ext *obj_exts;
+ int i;
+
+ /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */
+ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
return;
- return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
+ return;
+
+ for (i = 0; i < objects; i++) {
+ unsigned int off = obj_to_index(s, slab, p[i]);
+
+ alloc_tag_sub(&obj_exts[off].ref, s->size);
+ }
}
-static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
- void **p, int objects,
- struct obj_cgroup **objcgs)
+static inline void
+alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+ int objects)
{
- for (int i = 0; i < objects; i++) {
- struct obj_cgroup *objcg;
- unsigned int off;
+ if (mem_alloc_profiling_enabled())
+ __alloc_tagging_slab_free_hook(s, slab, p, objects);
+}
- off = obj_to_index(s, slab, p[i]);
- objcg = objcgs[off];
- if (!objcg)
- continue;
+#else /* CONFIG_MEM_ALLOC_PROFILING */
+
+static inline void
+alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
+{
+}
+
+static inline void
+alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+ int objects)
+{
+}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
+
+#ifdef CONFIG_MEMCG
+
+static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
+
+static __fastpath_inline
+bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p)
+{
+ if (likely(!memcg_kmem_online()))
+ return true;
+
+ if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
+ return true;
+
+ if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
+ return true;
- objcgs[off] = NULL;
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
- -obj_full_size(s));
- obj_cgroup_put(objcg);
+ if (likely(size == 1)) {
+ memcg_alloc_abort_single(s, *p);
+ *p = NULL;
+ } else {
+ kmem_cache_free_bulk(s, size, p);
}
+
+ return false;
}
static __fastpath_inline
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)
{
- struct obj_cgroup **objcgs;
+ struct slabobj_ext *obj_exts;
if (!memcg_kmem_online())
return;
- objcgs = slab_objcgs(slab);
- if (likely(!objcgs))
+ obj_exts = slab_obj_exts(slab);
+ if (likely(!obj_exts))
return;
- __memcg_slab_free_hook(s, slab, p, objects, objcgs);
+ __memcg_slab_free_hook(s, slab, p, objects, obj_exts);
}
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
-{
- if (objcg)
- obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
-}
-#else /* CONFIG_MEMCG_KMEM */
-static inline void memcg_free_slab_cgroups(struct slab *slab)
+static __fastpath_inline
+bool memcg_slab_post_charge(void *p, gfp_t flags)
{
-}
+ struct slabobj_ext *slab_exts;
+ struct kmem_cache *s;
+ struct folio *folio;
+ struct slab *slab;
+ unsigned long off;
-static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
-{
- return true;
+ folio = virt_to_folio(p);
+ if (!folio_test_slab(folio)) {
+ int size;
+
+ if (folio_memcg_kmem(folio))
+ return true;
+
+ if (__memcg_kmem_charge_page(folio_page(folio, 0), flags,
+ folio_order(folio)))
+ return false;
+
+ /*
+ * This folio has already been accounted in the global stats but
+ * not in the memcg stats. So, subtract from the global and use
+ * the interface which adds to both global and memcg stats.
+ */
+ size = folio_size(folio);
+ node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size);
+ lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size);
+ return true;
+ }
+
+ slab = folio_slab(folio);
+ s = slab->slab_cache;
+
+ /*
+ * Ignore KMALLOC_NORMAL cache to avoid possible circular dependency
+ * of slab_obj_exts being allocated from the same slab and thus the slab
+ * becoming effectively unfreeable.
+ */
+ if (is_kmalloc_normal(s))
+ return true;
+
+ /* Ignore already charged objects. */
+ slab_exts = slab_obj_exts(slab);
+ if (slab_exts) {
+ off = obj_to_index(s, slab, p);
+ if (unlikely(slab_exts[off].objcg))
+ return true;
+ }
+
+ return __memcg_slab_post_alloc_hook(s, NULL, flags, 1, &p);
}
-static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
+#else /* CONFIG_MEMCG */
+static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
gfp_t flags, size_t size,
void **p)
{
+ return true;
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
@@ -2052,23 +2273,36 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
{
}
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
+static inline bool memcg_slab_post_charge(void *p, gfp_t flags)
{
+ return true;
}
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
+
+#ifdef CONFIG_SLUB_RCU_DEBUG
+static void slab_free_after_rcu_debug(struct rcu_head *rcu_head);
+
+struct rcu_delayed_free {
+ struct rcu_head head;
+ void *object;
+};
+#endif
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
*
* Returns true if freeing of the object can proceed, false if its reuse
- * was delayed by KASAN quarantine, or it was returned to KFENCE.
+ * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
+ * to KFENCE.
*/
static __always_inline
-bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
+bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
+ bool after_rcu_delay)
{
+ /* Are the object contents still accessible? */
+ bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay;
+
kmemleak_free_recursive(x, s->flags);
kmsan_slab_free(s, x);
@@ -2078,7 +2312,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
debug_check_no_obj_freed(x, s->object_size);
/* Use KCSAN to help debug racy use-after-free. */
- if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
+ if (!still_accessible)
__kcsan_check_access(x, s->object_size,
KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
@@ -2086,29 +2320,70 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
return false;
/*
+ * Give KASAN a chance to notice an invalid free operation before we
+ * modify the object.
+ */
+ if (kasan_slab_pre_free(s, x))
+ return false;
+
+#ifdef CONFIG_SLUB_RCU_DEBUG
+ if (still_accessible) {
+ struct rcu_delayed_free *delayed_free;
+
+ delayed_free = kmalloc(sizeof(*delayed_free), GFP_NOWAIT);
+ if (delayed_free) {
+ /*
+ * Let KASAN track our call stack as a "related work
+ * creation", just like if the object had been freed
+ * normally via kfree_rcu().
+ * We have to do this manually because the rcu_head is
+ * not located inside the object.
+ */
+ kasan_record_aux_stack(x);
+
+ delayed_free->object = x;
+ call_rcu(&delayed_free->head, slab_free_after_rcu_debug);
+ return false;
+ }
+ }
+#endif /* CONFIG_SLUB_RCU_DEBUG */
+
+ /*
* As memory initialization might be integrated into KASAN,
* kasan_slab_free and initialization memset's must be
* kept together to avoid discrepancies in behavior.
*
* The initialization memset's clear the object and the metadata,
* but don't touch the SLAB redzone.
+ *
+ * The object's freepointer is also avoided if stored outside the
+ * object.
*/
if (unlikely(init)) {
int rsize;
+ unsigned int inuse, orig_size;
+ inuse = get_info_end(s);
+ orig_size = get_orig_size(s, x);
if (!kasan_has_integrated_init())
- memset(kasan_reset_tag(x), 0, s->object_size);
+ memset(kasan_reset_tag(x), 0, orig_size);
rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
- memset((char *)kasan_reset_tag(x) + s->inuse, 0,
- s->size - s->inuse - rsize);
+ memset((char *)kasan_reset_tag(x) + inuse, 0,
+ s->size - inuse - rsize);
+ /*
+ * Restore orig_size, otherwize kmalloc redzone overwritten
+ * would be reported
+ */
+ set_orig_size(s, x, orig_size);
+
}
/* KASAN might put x into memory quarantine, delaying its reuse. */
- return !kasan_slab_free(s, x, init);
+ return !kasan_slab_free(s, x, init, still_accessible);
}
-static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- void **head, void **tail,
- int *cnt)
+static __fastpath_inline
+bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
+ int *cnt)
{
void *object;
@@ -2117,7 +2392,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
bool init;
if (is_kfence_address(next)) {
- slab_free_hook(s, next, false);
+ slab_free_hook(s, next, false, false);
return false;
}
@@ -2132,7 +2407,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
next = get_freepointer(s, object);
/* If object's reuse doesn't have to be delayed */
- if (likely(slab_free_hook(s, object, init))) {
+ if (likely(slab_free_hook(s, object, init, false))) {
/* Move object to the new freelist */
set_freepointer(s, object, *head);
*head = object;
@@ -2172,14 +2447,16 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
struct slab *slab;
unsigned int order = oo_order(oo);
- folio = (struct folio *)alloc_pages_node(node, flags, order);
+ if (node == NUMA_NO_NODE)
+ folio = (struct folio *)alloc_frozen_pages(flags, order);
+ else
+ folio = (struct folio *)__alloc_frozen_pages(flags, order, node, NULL);
+
if (!folio)
return NULL;
slab = folio_slab(folio);
__folio_set_slab(folio);
- /* Make the flag visible before any changes to folio->mapping */
- smp_wmb();
if (folio_is_pfmemalloc(folio))
slab_set_pfmemalloc(slab);
@@ -2298,7 +2575,7 @@ static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
- memcg_alloc_slab_cgroups(slab, s, gfp, true);
+ alloc_slab_obj_exts(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
@@ -2307,8 +2584,12 @@ static __always_inline void account_slab(struct slab *slab, int order,
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
- if (memcg_kmem_online())
- memcg_free_slab_cgroups(slab);
+ /*
+ * The slab object extensions should now be freed regardless of
+ * whether mem_alloc_profiling_enabled() or not because profiling
+ * might have been disabled after slab->obj_exts got allocated.
+ */
+ free_slab_obj_exts(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
@@ -2352,6 +2633,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
slab->objects = oo_objects(oo);
slab->inuse = 0;
slab->frozen = 0;
+ init_slab_obj_exts(slab);
account_slab(slab, oo_order(oo), s, flags);
@@ -2400,12 +2682,10 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab)
__slab_clear_pfmemalloc(slab);
folio->mapping = NULL;
- /* Make the mapping reset visible before clearing the flag */
- smp_wmb();
__folio_clear_slab(folio);
mm_account_reclaimed_pages(pages);
unaccount_slab(slab, order, s);
- __free_pages(&folio->page, order);
+ free_frozen_pages(&folio->page, order);
}
static void rcu_free_slab(struct rcu_head *h)
@@ -2443,7 +2723,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab)
*/
static inline bool slab_test_node_partial(const struct slab *slab)
{
- return folio_test_workingset((struct folio *)slab_folio(slab));
+ return folio_test_workingset(slab_folio(slab));
}
static inline void slab_set_node_partial(struct slab *slab)
@@ -2504,7 +2784,8 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
slab->inuse++;
if (!alloc_debug_processing(s, slab, object, orig_size)) {
- remove_partial(n, slab);
+ if (folio_test_slab(slab_folio(slab)))
+ remove_partial(n, slab);
return NULL;
}
@@ -2604,19 +2885,18 @@ static struct slab *get_partial_node(struct kmem_cache *s,
if (!partial) {
partial = slab;
stat(s, ALLOC_FROM_PARTIAL);
+
+ if ((slub_get_cpu_partial(s) == 0)) {
+ break;
+ }
} else {
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
- partial_slabs++;
- }
-#ifdef CONFIG_SLUB_CPU_PARTIAL
- if (!kmem_cache_has_cpu_partial(s)
- || partial_slabs > s->cpu_partial_slabs / 2)
- break;
-#else
- break;
-#endif
+ if (++partial_slabs > slub_get_cpu_partial(s) / 2) {
+ break;
+ }
+ }
}
spin_unlock_irqrestore(&n->list_lock, flags);
return partial;
@@ -2699,7 +2979,7 @@ static struct slab *get_partial(struct kmem_cache *s, int node,
searchnode = numa_mem_id();
slab = get_partial_node(s, get_node(s, searchnode), pc);
- if (slab || node != NUMA_NO_NODE)
+ if (slab || (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE)))
return slab;
return get_any_partial(s, pc);
@@ -2797,7 +3077,7 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
struct slab new;
struct slab old;
- if (slab->freelist) {
+ if (READ_ONCE(slab->freelist)) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = DEACTIVATE_TO_TAIL;
}
@@ -3229,19 +3509,57 @@ static unsigned long count_partial(struct kmem_cache_node *n,
#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
#ifdef CONFIG_SLUB_DEBUG
+#define MAX_PARTIAL_TO_SCAN 10000
+
+static unsigned long count_partial_free_approx(struct kmem_cache_node *n)
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct slab *slab;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ if (n->nr_partial <= MAX_PARTIAL_TO_SCAN) {
+ list_for_each_entry(slab, &n->partial, slab_list)
+ x += slab->objects - slab->inuse;
+ } else {
+ /*
+ * For a long list, approximate the total count of objects in
+ * it to meet the limit on the number of slabs to scan.
+ * Scan from both the list's head and tail for better accuracy.
+ */
+ unsigned long scanned = 0;
+
+ list_for_each_entry(slab, &n->partial, slab_list) {
+ x += slab->objects - slab->inuse;
+ if (++scanned == MAX_PARTIAL_TO_SCAN / 2)
+ break;
+ }
+ list_for_each_entry_reverse(slab, &n->partial, slab_list) {
+ x += slab->objects - slab->inuse;
+ if (++scanned == MAX_PARTIAL_TO_SCAN)
+ break;
+ }
+ x = mult_frac(x, n->nr_partial, scanned);
+ x = min(x, node_nr_objs(n));
+ }
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
+ int cpu = raw_smp_processor_id();
int node;
struct kmem_cache_node *n;
if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
return;
- pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
- nid, gfpflags, &gfpflags);
+ pr_warn("SLUB: Unable to allocate memory on CPU %u (of node %d) on node %d, gfp=%#x(%pGg)\n",
+ cpu, cpu_to_node(cpu), nid, gfpflags, &gfpflags);
pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
s->name, s->object_size, s->size, oo_order(s->oo),
oo_order(s->min));
@@ -3255,7 +3573,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
unsigned long nr_objs;
unsigned long nr_free;
- nr_free = count_partial(n, count_free);
+ nr_free = count_partial_free_approx(n);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
@@ -3375,6 +3693,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
struct slab *slab;
unsigned long flags;
struct partial_context pc;
+ bool try_thisnode = true;
stat(s, ALLOC_SLOWPATH);
@@ -3501,6 +3820,21 @@ new_slab:
new_objects:
pc.flags = gfpflags;
+ /*
+ * When a preferred node is indicated but no __GFP_THISNODE
+ *
+ * 1) try to get a partial slab from target node only by having
+ * __GFP_THISNODE in pc.flags for get_partial()
+ * 2) if 1) failed, try to allocate a new slab from target node with
+ * GPF_NOWAIT | __GFP_THISNODE opportunistically
+ * 3) if 2) failed, retry with original gfpflags which will allow
+ * get_partial() try partial lists of other nodes before potentially
+ * allocating new page from other nodes
+ */
+ if (unlikely(node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
+ && try_thisnode))
+ pc.flags = GFP_NOWAIT | __GFP_THISNODE;
+
pc.orig_size = orig_size;
slab = get_partial(s, node, &pc);
if (slab) {
@@ -3522,10 +3856,15 @@ new_objects:
}
slub_put_cpu_ptr(s->cpu_slab);
- slab = new_slab(s, gfpflags, node);
+ slab = new_slab(s, pc.flags, node);
c = slub_get_cpu_ptr(s->cpu_slab);
if (unlikely(!slab)) {
+ if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
+ && try_thisnode) {
+ try_thisnode = false;
+ goto new_objects;
+ }
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
@@ -3658,6 +3997,28 @@ redo:
object = c->freelist;
slab = c->slab;
+#ifdef CONFIG_NUMA
+ if (static_branch_unlikely(&strict_numa) &&
+ node == NUMA_NO_NODE) {
+
+ struct mempolicy *mpol = current->mempolicy;
+
+ if (mpol) {
+ /*
+ * Special BIND rule support. If existing slab
+ * is in permitted set then do not redirect
+ * to a particular node.
+ * Otherwise we apply the memory policy to get
+ * the node we need to allocate on.
+ */
+ if (mpol->mode != MPOL_BIND || !slab ||
+ !node_isset(slab_nid(slab), mpol->nodes))
+
+ node = mempolicy_slab_node();
+ }
+ }
+#endif
+
if (!USE_LOCKLESS_FAST_PATH() ||
unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
@@ -3718,28 +4079,20 @@ static void *__slab_alloc_node(struct kmem_cache *s,
/*
* If the object has been wiped upon free, make sure it's fully initialized by
* zeroing out freelist pointer.
+ *
+ * Note that we also wipe custom freelist pointers.
*/
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
- if (unlikely(slab_want_init_on_free(s)) && obj)
+ if (unlikely(slab_want_init_on_free(s)) && obj &&
+ !freeptr_outside_object(s))
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
-noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
-{
- if (__should_failslab(s, gfpflags))
- return -ENOMEM;
- return 0;
-}
-ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
-
static __fastpath_inline
-struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t size, gfp_t flags)
+struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;
@@ -3748,14 +4101,11 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
if (unlikely(should_failslab(s, flags)))
return NULL;
- if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
- return NULL;
-
return s;
}
static __fastpath_inline
-void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
+bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
gfp_t flags, size_t size, void **p, bool init,
unsigned int orig_size)
{
@@ -3802,9 +4152,10 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
+ alloc_tagging_slab_alloc_hook(s, p[i], flags);
}
- memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+ return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
}
/*
@@ -3821,10 +4172,9 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
- struct obj_cgroup *objcg = NULL;
bool init = false;
- s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
+ s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;
@@ -3841,13 +4191,15 @@ out:
/*
* When init equals 'true', like for kzalloc() family, only
* @orig_size bytes might be zeroed instead of s->object_size
+ * In case this fails due to memcg_slab_post_alloc_hook(),
+ * object is set to NULL
*/
- slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
+ slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
return object;
}
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+void *kmem_cache_alloc_noprof(struct kmem_cache *s, gfp_t gfpflags)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
@@ -3856,9 +4208,9 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_noprof);
-void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
@@ -3868,7 +4220,16 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_lru);
+EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
+
+bool kmem_cache_charge(void *objp, gfp_t gfpflags)
+{
+ if (!memcg_kmem_online())
+ return true;
+
+ return memcg_slab_post_charge(objp, gfpflags);
+}
+EXPORT_SYMBOL(kmem_cache_charge);
/**
* kmem_cache_alloc_node - Allocate an object on the specified node
@@ -3883,7 +4244,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
*
* Return: pointer to the new object or %NULL in case of error
*/
-void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
@@ -3891,14 +4252,14 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
/*
* To avoid unnecessary overhead, we pass through large allocation requests
* directly to the page allocator. We use __GFP_COMP, because we will need to
* know the allocation order to free the pages properly in kfree.
*/
-static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
+static void *___kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct folio *folio;
void *ptr = NULL;
@@ -3908,11 +4269,12 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
- folio = (struct folio *)alloc_pages_node(node, flags, order);
+ folio = (struct folio *)alloc_pages_node_noprof(node, flags, order);
if (folio) {
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
PAGE_SIZE << order);
+ __folio_set_large_kmalloc(folio);
}
ptr = kasan_kmalloc_large(ptr, size, flags);
@@ -3923,35 +4285,35 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
-void *kmalloc_large(size_t size, gfp_t flags)
+void *__kmalloc_large_noprof(size_t size, gfp_t flags)
{
- void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
+ void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, NUMA_NO_NODE);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large);
+EXPORT_SYMBOL(__kmalloc_large_noprof);
-void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
- void *ret = __kmalloc_large_node(size, flags, node);
+ void *ret = ___kmalloc_large_node(size, flags, node);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size),
flags, node);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_node);
+EXPORT_SYMBOL(__kmalloc_large_node_noprof);
static __always_inline
-void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
+void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node,
unsigned long caller)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
- ret = __kmalloc_large_node(size, flags, node);
+ ret = __kmalloc_large_node_noprof(size, flags, node);
trace_kmalloc(caller, ret, size,
PAGE_SIZE << get_order(size), flags, node);
return ret;
@@ -3960,34 +4322,34 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
if (unlikely(!size))
return ZERO_SIZE_PTR;
- s = kmalloc_slab(size, flags, caller);
+ s = kmalloc_slab(size, b, flags, caller);
ret = slab_alloc_node(s, NULL, flags, node, caller, size);
ret = kasan_kmalloc(s, ret, size, flags);
trace_kmalloc(caller, ret, size, s->size, flags, node);
return ret;
}
-
-void *__kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
{
- return __do_kmalloc_node(size, flags, node, _RET_IP_);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_);
}
-EXPORT_SYMBOL(__kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node_noprof);
-void *__kmalloc(size_t size, gfp_t flags)
+void *__kmalloc_noprof(size_t size, gfp_t flags)
{
- return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
+ return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_);
}
-EXPORT_SYMBOL(__kmalloc);
+EXPORT_SYMBOL(__kmalloc_noprof);
-void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
- int node, unsigned long caller)
+void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags,
+ int node, unsigned long caller)
{
- return __do_kmalloc_node(size, flags, node, caller);
+ return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller);
+
}
-EXPORT_SYMBOL(__kmalloc_node_track_caller);
+EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof);
-void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
@@ -3997,10 +4359,10 @@ void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_trace);
+EXPORT_SYMBOL(__kmalloc_cache_noprof);
-void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size)
+void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
@@ -4009,7 +4371,7 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_node_trace);
+EXPORT_SYMBOL(__kmalloc_cache_node_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
@@ -4226,7 +4588,7 @@ redo:
c = raw_cpu_ptr(s->cpu_slab);
tid = READ_ONCE(c->tid);
- /* Same with comment on barrier() in slab_alloc_node() */
+ /* Same with comment on barrier() in __slab_alloc_node() */
barrier();
if (unlikely(slab != c->slab)) {
@@ -4276,16 +4638,28 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
unsigned long addr)
{
memcg_slab_free_hook(s, slab, &object, 1);
+ alloc_tagging_slab_free_hook(s, slab, &object, 1);
- if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
do_slab_free(s, slab, object, object, 1, addr);
}
+#ifdef CONFIG_MEMCG
+/* Do not inline the rare memcg charging failed path into the allocation path */
+static noinline
+void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
+{
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
+ do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
+}
+#endif
+
static __fastpath_inline
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
void *tail, void **p, int cnt, unsigned long addr)
{
memcg_slab_free_hook(s, slab, p, cnt);
+ alloc_tagging_slab_free_hook(s, slab, p, cnt);
/*
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
@@ -4294,6 +4668,33 @@ void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
do_slab_free(s, slab, head, tail, cnt, addr);
}
+#ifdef CONFIG_SLUB_RCU_DEBUG
+static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
+{
+ struct rcu_delayed_free *delayed_free =
+ container_of(rcu_head, struct rcu_delayed_free, head);
+ void *object = delayed_free->object;
+ struct slab *slab = virt_to_slab(object);
+ struct kmem_cache *s;
+
+ kfree(delayed_free);
+
+ if (WARN_ON(is_kfence_address(object)))
+ return;
+
+ /* find the object and the cache again */
+ if (WARN_ON(!slab))
+ return;
+ s = slab->slab_cache;
+ if (WARN_ON(!(s->flags & SLAB_TYPESAFE_BY_RCU)))
+ return;
+
+ /* resume freeing */
+ if (slab_free_hook(s, object, slab_want_init_on_free(s), true))
+ do_slab_free(s, slab, object, object, 1, _THIS_IP_);
+}
+#endif /* CONFIG_SLUB_RCU_DEBUG */
+
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{
@@ -4349,6 +4750,11 @@ static void free_large_kmalloc(struct folio *folio, void *object)
{
unsigned int order = folio_order(folio);
+ if (WARN_ON_ONCE(!folio_test_large_kmalloc(folio))) {
+ dump_page(&folio->page, "Not a kmalloc allocation");
+ return;
+ }
+
if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
@@ -4358,9 +4764,55 @@ static void free_large_kmalloc(struct folio *folio, void *object)
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
+ __folio_clear_large_kmalloc(folio);
folio_put(folio);
}
+/*
+ * Given an rcu_head embedded within an object obtained from kvmalloc at an
+ * offset < 4k, free the object in question.
+ */
+void kvfree_rcu_cb(struct rcu_head *head)
+{
+ void *obj = head;
+ struct folio *folio;
+ struct slab *slab;
+ struct kmem_cache *s;
+ void *slab_addr;
+
+ if (is_vmalloc_addr(obj)) {
+ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
+ vfree(obj);
+ return;
+ }
+
+ folio = virt_to_folio(obj);
+ if (!folio_test_slab(folio)) {
+ /*
+ * rcu_head offset can be only less than page size so no need to
+ * consider folio order
+ */
+ obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
+ free_large_kmalloc(folio, obj);
+ return;
+ }
+
+ slab = folio_slab(folio);
+ s = slab->slab_cache;
+ slab_addr = folio_address(folio);
+
+ if (is_kfence_address(obj)) {
+ obj = kfence_object_start(obj);
+ } else {
+ unsigned int idx = __obj_to_index(s, slab_addr, obj);
+
+ obj = slab_addr + s->size * idx;
+ obj = fixup_red_left(s, obj);
+ }
+
+ slab_free(s, slab, obj, _RET_IP_);
+}
+
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
@@ -4391,6 +4843,290 @@ void kfree(const void *object)
}
EXPORT_SYMBOL(kfree);
+static __always_inline __realloc_size(2) void *
+__do_krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+ size_t ks = 0;
+ int orig_size = 0;
+ struct kmem_cache *s = NULL;
+
+ if (unlikely(ZERO_OR_NULL_PTR(p)))
+ goto alloc_new;
+
+ /* Check for double-free. */
+ if (!kasan_check_byte(p))
+ return NULL;
+
+ if (is_kfence_address(p)) {
+ ks = orig_size = kfence_ksize(p);
+ } else {
+ struct folio *folio;
+
+ folio = virt_to_folio(p);
+ if (unlikely(!folio_test_slab(folio))) {
+ /* Big kmalloc object */
+ WARN_ON(folio_size(folio) <= KMALLOC_MAX_CACHE_SIZE);
+ WARN_ON(p != folio_address(folio));
+ ks = folio_size(folio);
+ } else {
+ s = folio_slab(folio)->slab_cache;
+ orig_size = get_orig_size(s, (void *)p);
+ ks = s->object_size;
+ }
+ }
+
+ /* If the old object doesn't fit, allocate a bigger one */
+ if (new_size > ks)
+ goto alloc_new;
+
+ /* Zero out spare memory. */
+ if (want_init_on_alloc(flags)) {
+ kasan_disable_current();
+ if (orig_size && orig_size < new_size)
+ memset(kasan_reset_tag(p) + orig_size, 0, new_size - orig_size);
+ else
+ memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
+ kasan_enable_current();
+ }
+
+ /* Setup kmalloc redzone when needed */
+ if (s && slub_debug_orig_size(s)) {
+ set_orig_size(s, (void *)p, new_size);
+ if (s->flags & SLAB_RED_ZONE && new_size < ks)
+ memset_no_sanitize_memory(kasan_reset_tag(p) + new_size,
+ SLUB_RED_ACTIVE, ks - new_size);
+ }
+
+ p = kasan_krealloc(p, new_size, flags);
+ return (void *)p;
+
+alloc_new:
+ ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
+ if (ret && p) {
+ /* Disable KASAN checks as the object's redzone is accessed. */
+ kasan_disable_current();
+ memcpy(ret, kasan_reset_tag(p), orig_size ?: ks);
+ kasan_enable_current();
+ }
+
+ return ret;
+}
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
+ * is 0 and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * When slub_debug_orig_size() is off, krealloc() only knows about the bucket
+ * size of an allocation (but not the exact size it was allocated with) and
+ * hence implements the following semantics for shrinking and growing buffers
+ * with __GFP_ZERO.
+ *
+ * new bucket
+ * 0 size size
+ * |--------|----------------|
+ * | keep | zero |
+ *
+ * Otherwise, the original allocation size 'orig_size' could be used to
+ * precisely clear the requested size, and the new size will also be stored
+ * as the new 'orig_size'.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
+ kfree(p);
+ return ZERO_SIZE_PTR;
+ }
+
+ ret = __do_krealloc(p, new_size, flags);
+ if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
+ kfree(p);
+
+ return ret;
+}
+EXPORT_SYMBOL(krealloc_noprof);
+
+static gfp_t kmalloc_gfp_adjust(gfp_t flags, size_t size)
+{
+ /*
+ * We want to attempt a large physically contiguous block first because
+ * it is less likely to fragment multiple larger blocks and therefore
+ * contribute to a long term fragmentation less than vmalloc fallback.
+ * However make sure that larger requests are not too disruptive - i.e.
+ * do not direct reclaim unless physically continuous memory is preferred
+ * (__GFP_RETRY_MAYFAIL mode). We still kick in kswapd/kcompactd to
+ * start working in the background
+ */
+ if (size > PAGE_SIZE) {
+ flags |= __GFP_NOWARN;
+
+ if (!(flags & __GFP_RETRY_MAYFAIL))
+ flags &= ~__GFP_DIRECT_RECLAIM;
+
+ /* nofail semantic is implemented by the vmalloc fallback */
+ flags &= ~__GFP_NOFAIL;
+ }
+
+ return flags;
+}
+
+/**
+ * __kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @b: which set of kmalloc buckets to allocate from.
+ * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
+ * @node: numa node to allocate from
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use kvfree for freeing the memory.
+ *
+ * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
+ *
+ * Return: pointer to the allocated memory of %NULL in case of failure
+ */
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
+{
+ void *ret;
+
+ /*
+ * It doesn't really make sense to fallback to vmalloc for sub page
+ * requests
+ */
+ ret = __do_kmalloc_node(size, PASS_BUCKET_PARAM(b),
+ kmalloc_gfp_adjust(flags, size),
+ node, _RET_IP_);
+ if (ret || size <= PAGE_SIZE)
+ return ret;
+
+ /* non-sleeping allocations are not supported by vmalloc */
+ if (!gfpflags_allow_blocking(flags))
+ return NULL;
+
+ /* Don't even allow crazy sizes */
+ if (unlikely(size > INT_MAX)) {
+ WARN_ON_ONCE(!(flags & __GFP_NOWARN));
+ return NULL;
+ }
+
+ /*
+ * kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
+ * since the callers already cannot assume anything
+ * about the resulting pointer, and cannot play
+ * protection games.
+ */
+ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
+ flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ node, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(__kvmalloc_node_noprof);
+
+/**
+ * kvfree() - Free memory.
+ * @addr: Pointer to allocated memory.
+ *
+ * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
+ * It is slightly more efficient to use kfree() or vfree() if you are certain
+ * that you know which one to use.
+ *
+ * Context: Either preemptible task context or not-NMI interrupt.
+ */
+void kvfree(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
+
+/**
+ * kvfree_sensitive - Free a data object containing sensitive information.
+ * @addr: address of the data object to be freed.
+ * @len: length of the data object.
+ *
+ * Use the special memzero_explicit() function to clear the content of a
+ * kvmalloc'ed object containing sensitive data to make sure that the
+ * compiler won't optimize out the data clearing.
+ */
+void kvfree_sensitive(const void *addr, size_t len)
+{
+ if (likely(!ZERO_OR_NULL_PTR(addr))) {
+ memzero_explicit((void *)addr, len);
+ kvfree(addr);
+ }
+}
+EXPORT_SYMBOL(kvfree_sensitive);
+
+/**
+ * kvrealloc - reallocate memory; contents remain unchanged
+ * @p: object to reallocate memory for
+ * @size: the size to reallocate
+ * @flags: the flags for the page level allocator
+ *
+ * If @p is %NULL, kvrealloc() behaves exactly like kvmalloc(). If @size is 0
+ * and @p is not a %NULL pointer, the object pointed to is freed.
+ *
+ * If __GFP_ZERO logic is requested, callers must ensure that, starting with the
+ * initial memory allocation, every subsequent call to this API for the same
+ * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
+ * __GFP_ZERO is not fully honored by this API.
+ *
+ * In any case, the contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes.
+ *
+ * This function must not be called concurrently with itself or kvfree() for the
+ * same memory allocation.
+ *
+ * Return: pointer to the allocated memory or %NULL in case of error
+ */
+void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags)
+{
+ void *n;
+
+ if (is_vmalloc_addr(p))
+ return vrealloc_noprof(p, size, flags);
+
+ n = krealloc_noprof(p, size, kmalloc_gfp_adjust(flags, size));
+ if (!n) {
+ /* We failed to krealloc(), fall back to kvmalloc(). */
+ n = kvmalloc_noprof(size, flags);
+ if (!n)
+ return NULL;
+
+ if (p) {
+ /* We already know that `p` is not a vmalloc address. */
+ kasan_disable_current();
+ memcpy(n, kasan_reset_tag(p), ksize(p));
+ kasan_enable_current();
+
+ kfree(p);
+ }
+ }
+
+ return n;
+}
+EXPORT_SYMBOL(kvrealloc_noprof);
+
struct detached_freelist {
struct slab *slab;
void *tail;
@@ -4486,6 +5222,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
if (!df.slab)
continue;
+ if (kfence_free(df.freelist))
+ continue;
+
do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt,
_RET_IP_);
} while (likely(size));
@@ -4612,36 +5351,33 @@ error:
#endif /* CONFIG_SLUB_TINY */
/* Note that interrupts must be enabled when calling this function. */
-int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
+ void **p)
{
int i;
- struct obj_cgroup *objcg = NULL;
if (!size)
return 0;
- /* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
+ s = slab_pre_alloc_hook(s, flags);
if (unlikely(!s))
return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p);
+ if (unlikely(i == 0))
+ return 0;
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
- if (likely(i != 0)) {
- slab_post_alloc_hook(s, objcg, flags, size, p,
- slab_want_init_on_alloc(flags, s), s->object_size);
- } else {
- memcg_slab_alloc_error_hook(s, size, objcg);
+ if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
+ slab_want_init_on_alloc(flags, s), s->object_size))) {
+ return 0;
}
-
return i;
}
-EXPORT_SYMBOL(kmem_cache_alloc_bulk);
+EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
/*
@@ -4847,7 +5583,6 @@ static void early_kmem_cache_node_alloc(int node)
BUG_ON(!n);
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
- init_tracking(kmem_cache_node, n);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
slab->freelist = get_freepointer(kmem_cache_node, n);
@@ -4945,7 +5680,7 @@ static void set_cpu_partial(struct kmem_cache *s)
* calculate_sizes() determines the order and the distribution of data within
* a slab object.
*/
-static int calculate_sizes(struct kmem_cache *s)
+static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
{
slab_flags_t flags = s->flags;
unsigned int size = s->object_size;
@@ -4986,10 +5721,10 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
- if (slub_debug_orig_size(s) ||
- (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
- ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
- s->ctor) {
+ if (((flags & SLAB_TYPESAFE_BY_RCU) && !args->use_freeptr_offset) ||
+ (flags & SLAB_POISON) || s->ctor ||
+ ((flags & SLAB_RED_ZONE) &&
+ (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) {
/*
* Relocate free pointer after the object if it is not
* permitted to overwrite the first word of the object on
@@ -4997,7 +5732,9 @@ static int calculate_sizes(struct kmem_cache *s)
*
* This is the case if we do RCU, have a constructor or
* destructor, are poisoning the objects, or are
- * redzoning an object smaller than sizeof(void *).
+ * redzoning an object smaller than sizeof(void *) or are
+ * redzoning an object with slub_debug_orig_size() enabled,
+ * in which case the right redzone may be extended.
*
* The assumption that s->offset >= s->inuse means free
* pointer is outside of the object is used in the
@@ -5006,6 +5743,8 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->offset = size;
size += sizeof(void *);
+ } else if ((flags & SLAB_TYPESAFE_BY_RCU) && args->use_freeptr_offset) {
+ s->offset = args->freeptr_offset;
} else {
/*
* Store freelist pointer near middle of object to keep
@@ -5060,9 +5799,7 @@ static int calculate_sizes(struct kmem_cache *s)
if ((int)order < 0)
return 0;
- s->allocflags = 0;
- if (order)
- s->allocflags |= __GFP_COMP;
+ s->allocflags = __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= GFP_DMA;
@@ -5082,73 +5819,14 @@ static int calculate_sizes(struct kmem_cache *s)
return !!oo_objects(s->oo);
}
-static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
-{
- s->flags = kmem_cache_flags(flags, s->name);
-#ifdef CONFIG_SLAB_FREELIST_HARDENED
- s->random = get_random_long();
-#endif
-
- if (!calculate_sizes(s))
- goto error;
- if (disable_higher_order_debug) {
- /*
- * Disable debugging flags that store metadata if the min slab
- * order increased.
- */
- if (get_order(s->size) > get_order(s->object_size)) {
- s->flags &= ~DEBUG_METADATA_FLAGS;
- s->offset = 0;
- if (!calculate_sizes(s))
- goto error;
- }
- }
-
-#ifdef system_has_freelist_aba
- if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
- /* Enable fast mode */
- s->flags |= __CMPXCHG_DOUBLE;
- }
-#endif
-
- /*
- * The larger the object size is, the more slabs we want on the partial
- * list to avoid pounding the page allocator excessively.
- */
- s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
- s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
-
- set_cpu_partial(s);
-
-#ifdef CONFIG_NUMA
- s->remote_node_defrag_ratio = 1000;
-#endif
-
- /* Initialize the pre-computed randomized freelist if slab is up */
- if (slab_state >= UP) {
- if (init_cache_random_seq(s))
- goto error;
- }
-
- if (!init_kmem_cache_nodes(s))
- goto error;
-
- if (alloc_kmem_cache_cpus(s))
- return 0;
-
-error:
- __kmem_cache_release(s);
- return -EINVAL;
-}
-
-static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
- const char *text)
+static void list_slab_objects(struct kmem_cache *s, struct slab *slab)
{
#ifdef CONFIG_SLUB_DEBUG
void *addr = slab_address(slab);
void *p;
- slab_err(s, slab, text, s->name);
+ if (!slab_add_kunit_errors())
+ slab_bug(s, "Objects remaining on __kmem_cache_shutdown()");
spin_lock(&object_map_lock);
__fill_map(object_map, s, slab);
@@ -5156,11 +5834,15 @@ static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
for_each_object(p, s, addr, slab->objects) {
if (!test_bit(__obj_to_index(s, addr, p), object_map)) {
+ if (slab_add_kunit_errors())
+ continue;
pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p);
}
}
spin_unlock(&object_map_lock);
+
+ __slab_err(slab);
#endif
}
@@ -5181,8 +5863,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
remove_partial(n, slab);
list_add(&slab->slab_list, &discard);
} else {
- list_slab_objects(s, slab,
- "Objects remaining in %s on __kmem_cache_shutdown()");
+ list_slab_objects(s, slab);
}
}
spin_unlock_irq(&n->list_lock);
@@ -5321,6 +6002,23 @@ static int __init setup_slub_min_objects(char *str)
__setup("slab_min_objects=", setup_slub_min_objects);
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
+#ifdef CONFIG_NUMA
+static int __init setup_slab_strict_numa(char *str)
+{
+ if (nr_node_ids > 1) {
+ static_branch_enable(&strict_numa);
+ pr_info("SLUB: Strict NUMA enabled.\n");
+ } else {
+ pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
+ }
+
+ return 1;
+}
+
+__setup("slab_strict_numa", setup_slab_strict_numa);
+#endif
+
+
#ifdef CONFIG_HARDENED_USERCOPY
/*
* Rejects incorrectly sized objects and objects that are to be copied
@@ -5630,7 +6328,8 @@ void __init kmem_cache_init(void)
node_set(node, slab_nodes);
create_boot_cache(kmem_cache_node, "kmem_cache_node",
- sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
+ sizeof(struct kmem_cache_node),
+ SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
@@ -5640,7 +6339,7 @@ void __init kmem_cache_init(void)
create_boot_cache(kmem_cache, "kmem_cache",
offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *),
- SLAB_HWCACHE_ALIGN, 0, 0);
+ SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
kmem_cache = bootstrap(&boot_kmem_cache);
kmem_cache_node = bootstrap(&boot_kmem_cache_node);
@@ -5678,7 +6377,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s = find_mergeable(size, align, flags, name, ctor);
if (s) {
if (sysfs_slab_alias(s, name))
- return NULL;
+ pr_err("SLUB: Unable to add cache alias %s to sysfs\n",
+ name);
s->refcount++;
@@ -5693,28 +6393,93 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
return s;
}
-int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
+int do_kmem_cache_create(struct kmem_cache *s, const char *name,
+ unsigned int size, struct kmem_cache_args *args,
+ slab_flags_t flags)
{
- int err;
+ int err = -EINVAL;
- err = kmem_cache_open(s, flags);
- if (err)
- return err;
+ s->name = name;
+ s->size = s->object_size = size;
+
+ s->flags = kmem_cache_flags(flags, s->name);
+#ifdef CONFIG_SLAB_FREELIST_HARDENED
+ s->random = get_random_long();
+#endif
+ s->align = args->align;
+ s->ctor = args->ctor;
+#ifdef CONFIG_HARDENED_USERCOPY
+ s->useroffset = args->useroffset;
+ s->usersize = args->usersize;
+#endif
+
+ if (!calculate_sizes(args, s))
+ goto out;
+ if (disable_higher_order_debug) {
+ /*
+ * Disable debugging flags that store metadata if the min slab
+ * order increased.
+ */
+ if (get_order(s->size) > get_order(s->object_size)) {
+ s->flags &= ~DEBUG_METADATA_FLAGS;
+ s->offset = 0;
+ if (!calculate_sizes(args, s))
+ goto out;
+ }
+ }
+
+#ifdef system_has_freelist_aba
+ if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
+ /* Enable fast mode */
+ s->flags |= __CMPXCHG_DOUBLE;
+ }
+#endif
+
+ /*
+ * The larger the object size is, the more slabs we want on the partial
+ * list to avoid pounding the page allocator excessively.
+ */
+ s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
+ s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
+
+ set_cpu_partial(s);
+
+#ifdef CONFIG_NUMA
+ s->remote_node_defrag_ratio = 1000;
+#endif
+
+ /* Initialize the pre-computed randomized freelist if slab is up */
+ if (slab_state >= UP) {
+ if (init_cache_random_seq(s))
+ goto out;
+ }
+
+ if (!init_kmem_cache_nodes(s))
+ goto out;
+
+ if (!alloc_kmem_cache_cpus(s))
+ goto out;
+
+ err = 0;
/* Mutex is not taken during early boot */
if (slab_state <= UP)
- return 0;
+ goto out;
- err = sysfs_slab_add(s);
- if (err) {
- __kmem_cache_release(s);
- return err;
- }
+ /*
+ * Failing to create sysfs files is not critical to SLUB functionality.
+ * If it fails, proceed with cache creation without these files.
+ */
+ if (sysfs_slab_add(s))
+ pr_err("SLUB: Unable to add cache %s to sysfs\n", s->name);
if (s->flags & SLAB_STORE_USER)
debugfs_slab_add(s);
- return 0;
+out:
+ if (err)
+ __kmem_cache_release(s);
+ return err;
}
#ifdef SLAB_SUPPORTS_SYSFS
@@ -6036,7 +6801,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
- x = slab->slabs;
+ x = data_race(slab->slabs);
total += x;
nodes[node] += x;
}
@@ -6241,7 +7006,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab)
- slabs += slab->slabs;
+ slabs += data_race(slab->slabs);
}
#endif
@@ -6255,7 +7020,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
- slabs = READ_ONCE(slab->slabs);
+ slabs = data_race(slab->slabs);
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, slabs);
@@ -6776,7 +7541,8 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- kobject_del(&s->kobj);
+ if (s->kobj.state_in_sysfs)
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
@@ -6805,6 +7571,11 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
* If we have a leftover link then remove it.
*/
sysfs_remove_link(&slab_kset->kobj, name);
+ /*
+ * The original cache may have failed to generate sysfs file.
+ * In that case, sysfs_create_link() returns -ENOENT and
+ * symbolic link creation is skipped.
+ */
return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
}
@@ -6988,10 +7759,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
return -ENOMEM;
}
- if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
- alloc = TRACK_ALLOC;
- else
- alloc = TRACK_FREE;
+ alloc = debugfs_get_aux_num(filep);
if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
bitmap_free(obj_map);
@@ -7047,11 +7815,11 @@ static void debugfs_slab_add(struct kmem_cache *s)
slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
- debugfs_create_file("alloc_traces", 0400,
- slab_cache_dir, s, &slab_debugfs_fops);
+ debugfs_create_file_aux_num("alloc_traces", 0400, slab_cache_dir, s,
+ TRACK_ALLOC, &slab_debugfs_fops);
- debugfs_create_file("free_traces", 0400,
- slab_cache_dir, s, &slab_debugfs_fops);
+ debugfs_create_file_aux_num("free_traces", 0400, slab_cache_dir, s,
+ TRACK_FREE, &slab_debugfs_fops);
}
void debugfs_slab_release(struct kmem_cache *s)
@@ -7089,7 +7857,7 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
- nr_free += count_partial(n, count_free);
+ nr_free += count_partial_free_approx(n);
}
sinfo->active_objs = nr_objs - nr_free;
@@ -7099,14 +7867,4 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
sinfo->objects_per_slab = oo_objects(s->oo);
sinfo->cache_order = oo_order(s->oo);
}
-
-void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
-{
-}
-
-ssize_t slabinfo_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- return -EIO;
-}
#endif /* CONFIG_SLUB_DEBUG */