summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug6
-rw-r--r--mm/backing-dev.c25
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/damon/core.c15
-rw-r--r--mm/damon/lru_sort.c43
-rw-r--r--mm/damon/reclaim.c18
-rw-r--r--mm/damon/sysfs-schemes.c4
-rw-r--r--mm/debug_vm_pgtable.c8
-rw-r--r--mm/filemap.c60
-rw-r--r--mm/kasan/common.c8
-rw-r--r--mm/kasan/generic.c93
-rw-r--r--mm/kasan/kasan.h10
-rw-r--r--mm/kasan/quarantine.c5
-rw-r--r--mm/memblock.c6
-rw-r--r--mm/memcontrol.c10
-rw-r--r--mm/memory.c24
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.h11
-rw-r--r--mm/slab_common.c29
-rw-r--r--mm/slub.c116
-rw-r--r--mm/swap.h5
-rw-r--r--mm/swap_state.c10
-rw-r--r--mm/swapfile.c35
-rw-r--r--mm/userfaultfd.c6
-rw-r--r--mm/util.c17
-rw-r--r--mm/vmalloc.c83
-rw-r--r--mm/vmscan.c5
-rw-r--r--mm/zswap.c15
31 files changed, 423 insertions, 309 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 321ab379994f..afc72fde0f03 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -64,11 +64,11 @@ config SLUB_DEBUG_ON
help
Boot with debugging on by default. SLUB boots by default with
the runtime debug capabilities switched off. Enabling this is
- equivalent to specifying the "slub_debug" parameter on boot.
+ equivalent to specifying the "slab_debug" parameter on boot.
There is no support for more fine grained debug control like
- possible with slub_debug=xxx. SLUB debugging may be switched
+ possible with slab_debug=xxx. SLUB debugging may be switched
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
- "slub_debug=-".
+ "slab_debug=-".
config PAGE_OWNER
bool "Track page owner"
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e039d05304dd..5f2be8c8df11 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -372,31 +372,6 @@ static int __init default_bdi_init(void)
}
subsys_initcall(default_bdi_init);
-/*
- * This function is used when the first inode for this wb is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes. Since the write-out would
- * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
- * set up a timer which wakes the bdi thread up later.
- *
- * Note, we wouldn't bother setting up the timer, but this function is on the
- * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
- * by delaying the wake-up.
- *
- * We have to be careful not to postpone flush work if it is scheduled for
- * earlier. Thus we use queue_delayed_work().
- */
-void wb_wakeup_delayed(struct bdi_writeback *wb)
-{
- unsigned long timeout;
-
- timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_irq(&wb->work_lock);
- if (test_bit(WB_registered, &wb->state))
- queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_irq(&wb->work_lock);
-}
-
static void wb_update_bandwidth_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
diff --git a/mm/compaction.c b/mm/compaction.c
index 4add68d40e8d..b961db601df4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2723,16 +2723,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, struct page **capture)
{
- int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- /*
- * Check if the GFP flags allow compaction - GFP_NOIO is really
- * tricky context because the migration might require IO
- */
- if (!may_perform_io)
+ if (!gfp_compaction_allowed(gfp_mask))
return COMPACT_SKIPPED;
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 36f6f1d21ff0..5b325749fc12 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1026,6 +1026,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
damon_for_each_scheme(s, c) {
struct damos_quota *quota = &s->quota;
+ if (c->passed_sample_intervals != s->next_apply_sis)
+ continue;
+
if (!s->wmarks.activated)
continue;
@@ -1176,10 +1179,6 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
if (c->passed_sample_intervals != s->next_apply_sis)
continue;
- s->next_apply_sis +=
- (s->apply_interval_us ? s->apply_interval_us :
- c->attrs.aggr_interval) / sample_interval;
-
if (!s->wmarks.activated)
continue;
@@ -1195,6 +1194,14 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
damon_for_each_region_safe(r, next_r, t)
damon_do_apply_schemes(c, t, r);
}
+
+ damon_for_each_scheme(s, c) {
+ if (c->passed_sample_intervals != s->next_apply_sis)
+ continue;
+ s->next_apply_sis +=
+ (s->apply_interval_us ? s->apply_interval_us :
+ c->attrs.aggr_interval) / sample_interval;
+ }
}
/*
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index f2e5f9431892..3de2916a65c3 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO);
}
+static void damon_lru_sort_copy_quota_status(struct damos_quota *dst,
+ struct damos_quota *src)
+{
+ dst->total_charged_sz = src->total_charged_sz;
+ dst->total_charged_ns = src->total_charged_ns;
+ dst->charged_sz = src->charged_sz;
+ dst->charged_from = src->charged_from;
+ dst->charge_target_from = src->charge_target_from;
+ dst->charge_addr_from = src->charge_addr_from;
+}
+
static int damon_lru_sort_apply_parameters(void)
{
- struct damos *scheme;
+ struct damos *scheme, *hot_scheme, *cold_scheme;
+ struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL;
unsigned int hot_thres, cold_thres;
int err = 0;
@@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void)
if (err)
return err;
+ damon_for_each_scheme(scheme, ctx) {
+ if (!old_hot_scheme) {
+ old_hot_scheme = scheme;
+ continue;
+ }
+ old_cold_scheme = scheme;
+ }
+
hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) *
hot_thres_access_freq / 1000;
- scheme = damon_lru_sort_new_hot_scheme(hot_thres);
- if (!scheme)
+ hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres);
+ if (!hot_scheme)
return -ENOMEM;
- damon_set_schemes(ctx, &scheme, 1);
+ if (old_hot_scheme)
+ damon_lru_sort_copy_quota_status(&hot_scheme->quota,
+ &old_hot_scheme->quota);
cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval;
- scheme = damon_lru_sort_new_cold_scheme(cold_thres);
- if (!scheme)
+ cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres);
+ if (!cold_scheme) {
+ damon_destroy_scheme(hot_scheme);
return -ENOMEM;
- damon_add_scheme(ctx, scheme);
+ }
+ if (old_cold_scheme)
+ damon_lru_sort_copy_quota_status(&cold_scheme->quota,
+ &old_cold_scheme->quota);
+
+ damon_set_schemes(ctx, &hot_scheme, 1);
+ damon_add_scheme(ctx, cold_scheme);
return damon_set_region_biggest_system_ram_default(target,
&monitor_region_start,
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index ab974e477d2f..66e190f0374a 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -150,9 +150,20 @@ static struct damos *damon_reclaim_new_scheme(void)
&damon_reclaim_wmarks);
}
+static void damon_reclaim_copy_quota_status(struct damos_quota *dst,
+ struct damos_quota *src)
+{
+ dst->total_charged_sz = src->total_charged_sz;
+ dst->total_charged_ns = src->total_charged_ns;
+ dst->charged_sz = src->charged_sz;
+ dst->charged_from = src->charged_from;
+ dst->charge_target_from = src->charge_target_from;
+ dst->charge_addr_from = src->charge_addr_from;
+}
+
static int damon_reclaim_apply_parameters(void)
{
- struct damos *scheme;
+ struct damos *scheme, *old_scheme;
struct damos_filter *filter;
int err = 0;
@@ -164,6 +175,11 @@ static int damon_reclaim_apply_parameters(void)
scheme = damon_reclaim_new_scheme();
if (!scheme)
return -ENOMEM;
+ if (!list_empty(&ctx->schemes)) {
+ damon_for_each_scheme(old_scheme, ctx)
+ damon_reclaim_copy_quota_status(&scheme->quota,
+ &old_scheme->quota);
+ }
if (skip_anon) {
filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
if (!filter) {
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index dd2fb5127009..ae0f0b314f3a 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1905,6 +1905,10 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
damon_for_each_scheme(scheme, ctx) {
struct damon_sysfs_scheme *sysfs_scheme;
+ /* user could have removed the scheme sysfs dir */
+ if (i >= sysfs_schemes->nr)
+ break;
+
sysfs_scheme = sysfs_schemes->schemes_arr[i];
damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals,
&scheme->quota);
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 5662e29fe253..65c19025da3d 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -362,6 +362,12 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
vaddr &= HPAGE_PUD_MASK;
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ /*
+ * Some architectures have debug checks to make sure
+ * huge pud mapping are only found with devmap entries
+ * For now test with only devmap entries.
+ */
+ pud = pud_mkdevmap(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page);
pudp_set_wrprotect(args->mm, vaddr, args->pudp);
@@ -374,6 +380,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
WARN_ON(!pud_none(pud));
#endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ pud = pud_mkdevmap(pud);
pud = pud_wrprotect(pud);
pud = pud_mkclean(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
@@ -391,6 +398,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
#endif /* __PAGETABLE_PMD_FOLDED */
pud = pfn_pud(args->pud_pfn, args->page_prot);
+ pud = pud_mkdevmap(pud);
pud = pud_mkyoung(pud);
set_pud_at(args->mm, vaddr, args->pudp, pud);
flush_dcache_page(page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6c8b089f00d2..fef125d9508b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2618,15 +2618,6 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
/*
- * Pairs with a barrier in
- * block_write_end()->mark_buffer_dirty() or other page
- * dirtying routines like iomap_write_end() to ensure
- * changes to page contents are visible before we see
- * increased inode size.
- */
- smp_rmb();
-
- /*
* Once we start copying data, we don't want to be touching any
* cachelines that might be contended:
*/
@@ -4120,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, folio, last_index) {
+ int order;
unsigned long nr_pages;
pgoff_t folio_first_index, folio_last_index;
+ /*
+ * Don't deref the folio. It is not pinned, and might
+ * get freed (and reused) underneath us.
+ *
+ * We *could* pin it, but that would be expensive for
+ * what should be a fast and lightweight syscall.
+ *
+ * Instead, derive all information of interest from
+ * the rcu-protected xarray.
+ */
+
if (xas_retry(&xas, folio))
continue;
+ order = xa_get_order(xas.xa, xas.xa_index);
+ nr_pages = 1 << order;
+ folio_first_index = round_down(xas.xa_index, 1 << order);
+ folio_last_index = folio_first_index + nr_pages - 1;
+
+ /* Folios might straddle the range boundaries, only count covered pages */
+ if (folio_first_index < first_index)
+ nr_pages -= first_index - folio_first_index;
+
+ if (folio_last_index > last_index)
+ nr_pages -= folio_last_index - last_index;
+
if (xa_is_value(folio)) {
/* page is evicted */
void *shadow = (void *)folio;
bool workingset; /* not used */
- int order = xa_get_order(xas.xa, xas.xa_index);
-
- nr_pages = 1 << order;
- folio_first_index = round_down(xas.xa_index, 1 << order);
- folio_last_index = folio_first_index + nr_pages - 1;
-
- /* Folios might straddle the range boundaries, only count covered pages */
- if (folio_first_index < first_index)
- nr_pages -= first_index - folio_first_index;
-
- if (folio_last_index > last_index)
- nr_pages -= folio_last_index - last_index;
cs->nr_evicted += nr_pages;
@@ -4159,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping,
goto resched;
}
- nr_pages = folio_nr_pages(folio);
- folio_first_index = folio_pgoff(folio);
- folio_last_index = folio_first_index + nr_pages - 1;
-
- /* Folios might straddle the range boundaries, only count covered pages */
- if (folio_first_index < first_index)
- nr_pages -= first_index - folio_first_index;
-
- if (folio_last_index > last_index)
- nr_pages -= folio_last_index - last_index;
-
/* page is in cache */
cs->nr_cache += nr_pages;
- if (folio_test_dirty(folio))
+ if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
cs->nr_dirty += nr_pages;
- if (folio_test_writeback(folio))
+ if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
cs->nr_writeback += nr_pages;
resched:
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 610efae91220..6ca63e8dda74 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -65,8 +65,7 @@ void kasan_save_track(struct kasan_track *track, gfp_t flags)
{
depot_stack_handle_t stack;
- stack = kasan_save_stack(flags,
- STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+ stack = kasan_save_stack(flags, STACK_DEPOT_FLAG_CAN_ALLOC);
kasan_set_track(track, stack);
}
@@ -266,10 +265,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object,
return true;
/*
- * If the object is not put into quarantine, it will likely be quickly
- * reallocated. Thus, release its metadata now.
+ * Note: Keep per-object metadata to allow KASAN print stack traces for
+ * use-after-free-before-realloc bugs.
*/
- kasan_release_object_meta(cache, object);
/* Let slab put the object onto the freelist. */
return false;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index df6627f62402..6310a180278b 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -334,14 +334,6 @@ DEFINE_ASAN_SET_SHADOW(f3);
DEFINE_ASAN_SET_SHADOW(f5);
DEFINE_ASAN_SET_SHADOW(f8);
-/* Only allow cache merging when no per-object metadata is present. */
-slab_flags_t kasan_never_merge(void)
-{
- if (!kasan_requires_meta())
- return 0;
- return SLAB_KASAN;
-}
-
/*
* Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
* For larger allocations larger redzones are used.
@@ -370,15 +362,13 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
return;
/*
- * SLAB_KASAN is used to mark caches that are sanitized by KASAN
- * and that thus have per-object metadata.
- * Currently this flag is used in two places:
- * 1. In slab_ksize() to account for per-object metadata when
- * calculating the size of the accessible memory within the object.
- * 2. In slab_common.c via kasan_never_merge() to prevent merging of
- * caches with per-object metadata.
+ * SLAB_KASAN is used to mark caches that are sanitized by KASAN and
+ * that thus have per-object metadata. Currently, this flag is used in
+ * slab_ksize() to account for per-object metadata when calculating the
+ * size of the accessible memory within the object. Additionally, we use
+ * SLAB_NO_MERGE to prevent merging of caches with per-object metadata.
*/
- *flags |= SLAB_KASAN;
+ *flags |= SLAB_KASAN | SLAB_NO_MERGE;
ok_size = *size;
@@ -485,16 +475,6 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
if (alloc_meta) {
/* Zero out alloc meta to mark it as invalid. */
__memset(alloc_meta, 0, sizeof(*alloc_meta));
-
- /*
- * Prepare the lock for saving auxiliary stack traces.
- * Temporarily disable KASAN bug reporting to allow instrumented
- * raw_spin_lock_init to access aux_lock, which resides inside
- * of a redzone.
- */
- kasan_disable_current();
- raw_spin_lock_init(&alloc_meta->aux_lock);
- kasan_enable_current();
}
/*
@@ -506,47 +486,23 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
static void release_alloc_meta(struct kasan_alloc_meta *meta)
{
- /* Evict the stack traces from stack depot. */
- stack_depot_put(meta->alloc_track.stack);
- stack_depot_put(meta->aux_stack[0]);
- stack_depot_put(meta->aux_stack[1]);
-
- /*
- * Zero out alloc meta to mark it as invalid but keep aux_lock
- * initialized to avoid having to reinitialize it when another object
- * is allocated in the same slot.
- */
- __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track));
- __memset(meta->aux_stack, 0, sizeof(meta->aux_stack));
+ /* Zero out alloc meta to mark it as invalid. */
+ __memset(meta, 0, sizeof(*meta));
}
static void release_free_meta(const void *object, struct kasan_free_meta *meta)
{
+ if (!kasan_arch_is_ready())
+ return;
+
/* Check if free meta is valid. */
if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META)
return;
- /* Evict the stack trace from the stack depot. */
- stack_depot_put(meta->free_track.stack);
-
/* Mark free meta as invalid. */
*(u8 *)kasan_mem_to_shadow(object) = KASAN_SLAB_FREE;
}
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object)
-{
- struct kasan_alloc_meta *alloc_meta;
- struct kasan_free_meta *free_meta;
-
- alloc_meta = kasan_get_alloc_meta(cache, object);
- if (alloc_meta)
- release_alloc_meta(alloc_meta);
-
- free_meta = kasan_get_free_meta(cache, object);
- if (free_meta)
- release_free_meta(object, free_meta);
-}
-
size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{
struct kasan_cache *info = &cache->kasan_info;
@@ -571,8 +527,6 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
- depot_stack_handle_t new_handle, old_handle;
- unsigned long flags;
if (is_kfence_address(addr) || !slab)
return;
@@ -583,33 +537,18 @@ static void __kasan_record_aux_stack(void *addr, depot_flags_t depot_flags)
if (!alloc_meta)
return;
- new_handle = kasan_save_stack(0, depot_flags);
-
- /*
- * Temporarily disable KASAN bug reporting to allow instrumented
- * spinlock functions to access aux_lock, which resides inside of a
- * redzone.
- */
- kasan_disable_current();
- raw_spin_lock_irqsave(&alloc_meta->aux_lock, flags);
- old_handle = alloc_meta->aux_stack[1];
alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
- alloc_meta->aux_stack[0] = new_handle;
- raw_spin_unlock_irqrestore(&alloc_meta->aux_lock, flags);
- kasan_enable_current();
-
- stack_depot_put(old_handle);
+ alloc_meta->aux_stack[0] = kasan_save_stack(0, depot_flags);
}
void kasan_record_aux_stack(void *addr)
{
- return __kasan_record_aux_stack(addr,
- STACK_DEPOT_FLAG_CAN_ALLOC | STACK_DEPOT_FLAG_GET);
+ return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_CAN_ALLOC);
}
void kasan_record_aux_stack_noalloc(void *addr)
{
- return __kasan_record_aux_stack(addr, STACK_DEPOT_FLAG_GET);
+ return __kasan_record_aux_stack(addr, 0);
}
void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
@@ -620,7 +559,7 @@ void kasan_save_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
if (!alloc_meta)
return;
- /* Evict previous stack traces (might exist for krealloc or mempool). */
+ /* Invalidate previous stack traces (might exist for krealloc or mempool). */
release_alloc_meta(alloc_meta);
kasan_save_track(&alloc_meta->alloc_track, flags);
@@ -634,7 +573,7 @@ void kasan_save_free_info(struct kmem_cache *cache, void *object)
if (!free_meta)
return;
- /* Evict previous stack trace (might exist for mempool). */
+ /* Invalidate previous stack trace (might exist for mempool). */
release_free_meta(object, free_meta);
kasan_save_track(&free_meta->free_track, 0);
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index d0f172f2b978..fb2b9ac0659a 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -6,7 +6,6 @@
#include <linux/kasan.h>
#include <linux/kasan-tags.h>
#include <linux/kfence.h>
-#include <linux/spinlock.h>
#include <linux/stackdepot.h>
#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
@@ -265,13 +264,6 @@ struct kasan_global {
struct kasan_alloc_meta {
struct kasan_track alloc_track;
/* Free track is stored in kasan_free_meta. */
- /*
- * aux_lock protects aux_stack from accesses from concurrent
- * kasan_record_aux_stack calls. It is a raw spinlock to avoid sleeping
- * on RT kernels, as kasan_record_aux_stack_noalloc can be called from
- * non-sleepable contexts.
- */
- raw_spinlock_t aux_lock;
depot_stack_handle_t aux_stack[2];
};
@@ -398,10 +390,8 @@ struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
const void *object);
void kasan_init_object_meta(struct kmem_cache *cache, const void *object);
-void kasan_release_object_meta(struct kmem_cache *cache, const void *object);
#else
static inline void kasan_init_object_meta(struct kmem_cache *cache, const void *object) { }
-static inline void kasan_release_object_meta(struct kmem_cache *cache, const void *object) { }
#endif
depot_stack_handle_t kasan_save_stack(gfp_t flags, depot_flags_t depot_flags);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 3ba02efb952a..6958aa713c67 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -145,7 +145,10 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
void *object = qlink_to_object(qlink, cache);
struct kasan_free_meta *free_meta = kasan_get_free_meta(cache, object);
- kasan_release_object_meta(cache, object);
+ /*
+ * Note: Keep per-object metadata to allow KASAN print stack traces for
+ * use-after-free-before-realloc bugs.
+ */
/*
* If init_on_free is enabled and KASAN's free metadata is stored in
diff --git a/mm/memblock.c b/mm/memblock.c
index 4dcb2ee35eca..d09136e040d3 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
/*
* Address comparison utilities
*/
-static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
- phys_addr_t base2, phys_addr_t size2)
+unsigned long __init_memblock
+memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
+ phys_addr_t size2)
{
return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}
@@ -2249,6 +2250,7 @@ static const char * const flagname[] = {
[ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
[ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
[ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
+ [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
};
static int memblock_debug_show(struct seq_file *m, void *private)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1ed40f9d3a27..61932c9215e7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7971,9 +7971,13 @@ bool mem_cgroup_swap_full(struct folio *folio)
static int __init setup_swap_account(char *s)
{
- pr_warn_once("The swapaccount= commandline option is deprecated. "
- "Please report your usecase to linux-mm@kvack.org if you "
- "depend on this functionality.\n");
+ bool res;
+
+ if (!kstrtobool(s, &res) && !res)
+ pr_warn_once("The swapaccount=0 commandline option is deprecated "
+ "in favor of configuring swap control via cgroupfs. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
return 1;
}
__setup("swapaccount=", setup_swap_account);
diff --git a/mm/memory.c b/mm/memory.c
index 89bcae0b224d..0bfc8b007c01 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct page *page;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
+ bool need_clear_cache = false;
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
@@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
+ /*
+ * Prevent parallel swapin from proceeding with
+ * the cache flag. Otherwise, another thread may
+ * finish swapin first, free the entry, and swapout
+ * reusing the same entry. It's undetectable as
+ * pte_same() returns true due to entry reuse.
+ */
+ if (swapcache_prepare(entry)) {
+ /* Relax a bit to prevent rapid repeated page faults */
+ schedule_timeout_uninterruptible(1);
+ goto out;
+ }
+ need_clear_cache = true;
+
/* skip swapcache */
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
vma, vmf->address, false);
@@ -4117,6 +4132,9 @@ unlock:
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
+ /* Clear the swap cache pin for direct swapin after PTL unlock */
+ if (need_clear_cache)
+ swapcache_clear(si, entry);
if (si)
put_swap_device(si);
return ret;
@@ -4131,6 +4149,8 @@ out_release:
folio_unlock(swapcache);
folio_put(swapcache);
}
+ if (need_clear_cache)
+ swapcache_clear(si, entry);
if (si)
put_swap_device(si);
return ret;
@@ -5478,7 +5498,7 @@ static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs
return true;
if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
@@ -5503,7 +5523,7 @@ static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_r
{
mmap_read_unlock(mm);
if (regs && !user_mode(regs)) {
- unsigned long ip = instruction_pointer(regs);
+ unsigned long ip = exception_ip(regs);
if (!search_exception_tables(ip))
return false;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index cc9f2bcd73b4..c27b1f8097d4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2519,6 +2519,14 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio)
if (managed_zone(pgdat->node_zones + z))
break;
}
+
+ /*
+ * If there are no managed zones, it should not proceed
+ * further.
+ */
+ if (z < 0)
+ return 0;
+
wakeup_kswapd(pgdat->node_zones + z, 0,
folio_order(folio), ZONE_MOVABLE);
return 0;
diff --git a/mm/mmap.c b/mm/mmap.c
index d89770eaab6b..3281287771c9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -954,13 +954,21 @@ static struct vm_area_struct
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
+ /*
+ * can_vma_merge_after() assumed we would not be
+ * removing prev vma, so it skipped the check
+ * for vm_ops->close, but we are removing curr
+ */
+ if (curr->vm_ops && curr->vm_ops->close)
+ err = -EINVAL;
remove = curr;
} else { /* case 5 */
adjust = curr;
adj_start = (end - curr->vm_start);
}
+ if (!err)
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else { /* merge_next */
vma_start_write(next);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 150d4f23b010..62fc2e8f2733 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4041,6 +4041,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ bool can_compact = gfp_compaction_allowed(gfp_mask);
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4111,7 +4112,7 @@ restart:
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim &&
+ if (can_direct_reclaim && can_compact &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
@@ -4209,9 +4210,10 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_RETRY_MAYFAIL
+ * __GFP_RETRY_MAYFAIL and we can compact
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && (!can_compact ||
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -4224,7 +4226,7 @@ retry:
* implementation of the compaction depends on the sufficient amount
* of free memory (see __compaction_suitable)
*/
- if (did_some_progress > 0 &&
+ if (did_some_progress > 0 && can_compact &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
&compaction_retries))
@@ -4685,8 +4687,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
+ gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
+ __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
@@ -4699,6 +4701,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
return page;
}
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+ if (!nc->va)
+ return;
+
+ __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+ nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
@@ -4708,9 +4720,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
}
EXPORT_SYMBOL(__page_frag_cache_drain);
-void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
@@ -4779,7 +4791,7 @@ refill:
return nc->va + offset;
}
-EXPORT_SYMBOL(page_frag_alloc_align);
+EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
diff --git a/mm/shmem.c b/mm/shmem.c
index cf3689926418..be890844fc92 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3407,7 +3407,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
- if (!simple_empty(dentry))
+ if (!simple_offset_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@@ -3464,7 +3464,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
- if (!simple_empty(new_dentry))
+ if (!simple_offset_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
@@ -4388,7 +4388,9 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#ifdef CONFIG_TMPFS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
#endif
- uuid_gen(&sb->s_uuid);
+ uuid_t uuid;
+ uuid_gen(&uuid);
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
#ifdef CONFIG_TMPFS_QUOTA
if (ctx->seen & SHMEM_SEEN_QUOTA) {
diff --git a/mm/slab.h b/mm/slab.h
index 54deeb0428c6..d2bc9b191222 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -363,7 +363,6 @@ static inline int objs_per_slab(const struct kmem_cache *cache,
enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
- PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
@@ -387,7 +386,7 @@ extern const struct kmalloc_info_struct {
/* Kmalloc array related functions */
void setup_kmalloc_cache_index_table(void);
-void create_kmalloc_caches(slab_flags_t);
+void create_kmalloc_caches(void);
extern u8 kmalloc_size_index[24];
@@ -422,8 +421,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags);
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
void __init kmem_cache_init(void);
-void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
- slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize);
@@ -435,8 +432,7 @@ struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *));
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name);
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name);
static inline bool is_kmalloc_cache(struct kmem_cache *s)
{
@@ -469,7 +465,6 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_STORE_USER | \
SLAB_TRACE | \
SLAB_CONSISTENCY_CHECKS | \
- SLAB_MEM_SPREAD | \
SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
@@ -528,7 +523,7 @@ static inline bool __slub_debug_enabled(void)
#endif
/*
- * Returns true if any of the specified slub_debug flags is enabled for the
+ * Returns true if any of the specified slab_debug flags is enabled for the
* cache. Use only for flags parsed by setup_slub_debug() as it also enables
* the static key.
*/
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 238293b1dbe1..23af762148ca 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -50,7 +50,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
+ SLAB_FAILSLAB | SLAB_NO_MERGE)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -172,7 +172,7 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
- flags = kmem_cache_flags(size, flags, name);
+ flags = kmem_cache_flags(flags, name);
if (flags & SLAB_NEVER_MERGE)
return NULL;
@@ -282,7 +282,7 @@ kmem_cache_create_usercopy(const char *name,
#ifdef CONFIG_SLUB_DEBUG
/*
- * If no slub_debug was enabled globally, the static key is not yet
+ * If no slab_debug was enabled globally, the static key is not yet
* enabled by setup_slub_debug(). Enable it if the cache is being
* created with any of the debugging flags passed explicitly.
* It's also possible that this is the first cache created with
@@ -404,8 +404,12 @@ EXPORT_SYMBOL(kmem_cache_create);
*/
static void kmem_cache_release(struct kmem_cache *s)
{
- sysfs_slab_unlink(s);
- sysfs_slab_release(s);
+ if (slab_state >= FULL) {
+ sysfs_slab_unlink(s);
+ sysfs_slab_release(s);
+ } else {
+ slab_kmem_cache_release(s);
+ }
}
#else
static void kmem_cache_release(struct kmem_cache *s)
@@ -766,7 +770,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
}
/*
- * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
+ * kmalloc_info[] is to make slab_debug=,kmalloc-xx option work at boot time.
* kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
* kmalloc-2M.
*/
@@ -853,9 +857,10 @@ static unsigned int __kmalloc_minalign(void)
return max(minalign, arch_slab_minalign());
}
-void __init
-new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
{
+ slab_flags_t flags = 0;
unsigned int minalign = __kmalloc_minalign();
unsigned int aligned_size = kmalloc_info[idx].size;
int aligned_idx = idx;
@@ -902,7 +907,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
-void __init create_kmalloc_caches(slab_flags_t flags)
+void __init create_kmalloc_caches(void)
{
int i;
enum kmalloc_cache_type type;
@@ -913,7 +918,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[type][i])
- new_kmalloc_cache(i, type, flags);
+ new_kmalloc_cache(i, type);
/*
* Caches that are not of the two-to-the-power-of size.
@@ -922,10 +927,10 @@ void __init create_kmalloc_caches(slab_flags_t flags)
*/
if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
!kmalloc_caches[type][1])
- new_kmalloc_cache(1, type, flags);
+ new_kmalloc_cache(1, type);
if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
!kmalloc_caches[type][2])
- new_kmalloc_cache(2, type, flags);
+ new_kmalloc_cache(2, type);
}
}
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..1bb2a93cf7b6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -295,7 +295,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/*
* Debugging flags that require metadata to be stored in the slab. These get
- * disabled when slub_debug=O is used and a cache's min order increases with
+ * disabled when slab_debug=O is used and a cache's min order increases with
* metadata.
*/
#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -306,13 +306,13 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Internal SLUB flags */
/* Poison object */
-#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
+#define __OBJECT_POISON __SLAB_FLAG_BIT(_SLAB_OBJECT_POISON)
/* Use cmpxchg_double */
#ifdef system_has_freelist_aba
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_BIT(_SLAB_CMPXCHG_DOUBLE)
#else
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_UNUSED
#endif
/*
@@ -391,7 +391,7 @@ struct kmem_cache_cpu {
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
- struct slab *partial; /* Partially allocated frozen slabs */
+ struct slab *partial; /* Partially allocated slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
@@ -1498,16 +1498,8 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
- /*
- * May be called early in order to allocate a slab for the
- * kmem_cache_node structure. Solve the chicken-egg
- * dilemma by deferring the increment of the count during
- * bootstrap (see early_kmem_cache_node_alloc).
- */
- if (likely(n)) {
- atomic_long_inc(&n->nr_slabs);
- atomic_long_add(objects, &n->total_objects);
- }
+ atomic_long_inc(&n->nr_slabs);
+ atomic_long_add(objects, &n->total_objects);
}
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
@@ -1616,7 +1608,7 @@ static inline int free_consistency_checks(struct kmem_cache *s,
}
/*
- * Parse a block of slub_debug options. Blocks are delimited by ';'
+ * Parse a block of slab_debug options. Blocks are delimited by ';'
*
* @str: start of block
* @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
@@ -1677,7 +1669,7 @@ parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
break;
default:
if (init)
- pr_err("slub_debug option '%c' unknown. skipped\n", *str);
+ pr_err("slab_debug option '%c' unknown. skipped\n", *str);
}
}
check_slabs:
@@ -1736,7 +1728,7 @@ static int __init setup_slub_debug(char *str)
/*
* For backwards compatibility, a single list of flags with list of
* slabs means debugging is only changed for those slabs, so the global
- * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
+ * slab_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
* on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
* long as there is no option specifying flags without a slab list.
*/
@@ -1760,21 +1752,20 @@ out:
return 1;
}
-__setup("slub_debug", setup_slub_debug);
+__setup("slab_debug", setup_slub_debug);
+__setup_param("slub_debug", slub_debug, setup_slub_debug, 0);
/*
* kmem_cache_flags - apply debugging options to the cache
- * @object_size: the size of an object without meta data
* @flags: flags to set
* @name: name of the cache
*
* Debug option(s) are applied to @flags. In addition to the debug
* option(s), if a slab name (or multiple) is specified i.e.
- * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
+ * slab_debug=<Debug-Options>,<slab name1>,<slab name2> ...
* then only the select slabs will receive the debug option(s).
*/
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
char *iter;
size_t len;
@@ -1850,8 +1841,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
return flags;
}
@@ -2038,11 +2028,6 @@ void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
}
#else /* CONFIG_MEMCG_KMEM */
-static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
-{
- return NULL;
-}
-
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
@@ -2243,7 +2228,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
-static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
+static void *next_freelist_entry(struct kmem_cache *s,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
@@ -2282,13 +2267,12 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
- cur = next_freelist_entry(s, slab, &pos, start, page_limit,
- freelist_count);
+ cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count);
cur = setup_object(s, cur);
slab->freelist = cur;
for (idx = 1; idx < slab->objects; idx++) {
- next = next_freelist_entry(s, slab, &pos, start, page_limit,
+ next = next_freelist_entry(s, &pos, start, page_limit,
freelist_count);
next = setup_object(s, next);
set_freepointer(s, cur, next);
@@ -3263,7 +3247,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
oo_order(s->min));
if (oo_order(s->min) > get_order(s->object_size))
- pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
+ pr_warn(" %s debugging increased min order, use slab_debug=O to disable.\n",
s->name);
for_each_kmem_cache_node(s, node, n) {
@@ -3326,7 +3310,6 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
counters = slab->counters;
new.counters = counters;
- VM_BUG_ON(!new.frozen);
new.inuse = slab->objects;
new.frozen = freelist != NULL;
@@ -3498,18 +3481,20 @@ new_slab:
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
- local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- stat(s, CPU_PARTIAL_ALLOC);
- if (unlikely(!node_match(slab, node) ||
- !pfmemalloc_match(slab, gfpflags))) {
- slab->next = NULL;
- __put_partials(s, slab);
- continue;
+ if (likely(node_match(slab, node) &&
+ pfmemalloc_match(slab, gfpflags))) {
+ c->slab = slab;
+ freelist = get_freelist(s, slab);
+ VM_BUG_ON(!freelist);
+ stat(s, CPU_PARTIAL_ALLOC);
+ goto load_freelist;
}
- freelist = freeze_slab(s, slab);
- goto retry_load_slab;
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+ slab->next = NULL;
+ __put_partials(s, slab);
}
#endif
@@ -3792,11 +3777,11 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
zero_size = orig_size;
/*
- * When slub_debug is enabled, avoid memory initialization integrated
+ * When slab_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
- * penalty on production builds, as slub_debug is not intended to be
+ * penalty on production builds, as slab_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
@@ -4187,7 +4172,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
- remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
@@ -4201,9 +4185,6 @@ slab_empty:
*/
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
- } else {
- /* Slab must be on the full list */
- remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -4702,8 +4683,8 @@ static unsigned int slub_min_objects;
* activity on the partial lists which requires taking the list_lock. This is
* less a concern for large slabs though which are rarely used.
*
- * slub_max_order specifies the order where we begin to stop considering the
- * number of objects in a slab as critical. If we reach slub_max_order then
+ * slab_max_order specifies the order where we begin to stop considering the
+ * number of objects in a slab as critical. If we reach slab_max_order then
* we try to keep the page order as low as possible. So we accept more waste
* of space in favor of a small page order.
*
@@ -4770,14 +4751,14 @@ static inline int calculate_order(unsigned int size)
* and backing off gradually.
*
* We start with accepting at most 1/16 waste and try to find the
- * smallest order from min_objects-derived/slub_min_order up to
- * slub_max_order that will satisfy the constraint. Note that increasing
+ * smallest order from min_objects-derived/slab_min_order up to
+ * slab_max_order that will satisfy the constraint. Note that increasing
* the order can only result in same or less fractional waste, not more.
*
* If that fails, we increase the acceptable fraction of waste and try
* again. The last iteration with fraction of 1/2 would effectively
* accept any waste and give us the order determined by min_objects, as
- * long as at least single object fits within slub_max_order.
+ * long as at least single object fits within slab_max_order.
*/
for (unsigned int fraction = 16; fraction > 1; fraction /= 2) {
order = calc_slab_order(size, min_order, slub_max_order,
@@ -4787,7 +4768,7 @@ static inline int calculate_order(unsigned int size)
}
/*
- * Doh this slab cannot be placed using slub_max_order.
+ * Doh this slab cannot be placed using slab_max_order.
*/
order = get_order(size);
if (order <= MAX_PAGE_ORDER)
@@ -4857,7 +4838,6 @@ static void early_kmem_cache_node_alloc(int node)
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!slab);
- inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects);
if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
@@ -5104,7 +5084,7 @@ static int calculate_sizes(struct kmem_cache *s)
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
- s->flags = kmem_cache_flags(s->size, flags, s->name);
+ s->flags = kmem_cache_flags(flags, s->name);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
@@ -5313,7 +5293,9 @@ static int __init setup_slub_min_order(char *str)
return 1;
}
-__setup("slub_min_order=", setup_slub_min_order);
+__setup("slab_min_order=", setup_slub_min_order);
+__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, 0);
+
static int __init setup_slub_max_order(char *str)
{
@@ -5326,7 +5308,8 @@ static int __init setup_slub_max_order(char *str)
return 1;
}
-__setup("slub_max_order=", setup_slub_max_order);
+__setup("slab_max_order=", setup_slub_max_order);
+__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, 0);
static int __init setup_slub_min_objects(char *str)
{
@@ -5335,7 +5318,8 @@ static int __init setup_slub_min_objects(char *str)
return 1;
}
-__setup("slub_min_objects=", setup_slub_min_objects);
+__setup("slab_min_objects=", setup_slub_min_objects);
+__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
#ifdef CONFIG_HARDENED_USERCOPY
/*
@@ -5663,7 +5647,7 @@ void __init kmem_cache_init(void)
/* Now we can use the kmem_cache to allocate kmalloc slabs */
setup_kmalloc_cache_index_table();
- create_kmalloc_caches(0);
+ create_kmalloc_caches();
/* Setup random freelists for each cache */
init_freelist_randomization();
@@ -6792,14 +6776,12 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_del(&s->kobj);
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_put(&s->kobj);
+ kobject_put(&s->kobj);
}
/*
diff --git a/mm/swap.h b/mm/swap.h
index 758c46ca671e..fc2f6ade7f80 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio,
void delete_from_swap_cache(struct folio *folio);
void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end);
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry);
struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr);
struct folio *filemap_get_incore_folio(struct address_space *mapping,
@@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
return 0;
}
+static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+}
+
static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
struct vm_area_struct *vma, unsigned long addr)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e671266ad772..7255c01a1e4e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -680,9 +680,10 @@ skip:
/* The page was likely read above, so no need for plugging here */
folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
&page_allocated, false);
- if (unlikely(page_allocated))
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
- zswap_folio_swapin(folio);
+ }
return folio;
}
@@ -855,9 +856,10 @@ skip:
/* The folio was likely read above, so no need for plugging here */
folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
&page_allocated, false);
- if (unlikely(page_allocated))
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
- zswap_folio_swapin(folio);
+ }
return folio;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 556ff7347d5f..573843d9cc91 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2532,10 +2532,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
exit_swap_address_space(p->type);
inode = mapping->host;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode_lock(inode);
@@ -2765,14 +2765,14 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
int error;
if (S_ISBLK(inode->i_mode)) {
- p->bdev_handle = bdev_open_by_dev(inode->i_rdev,
+ p->bdev_file = bdev_file_open_by_dev(inode->i_rdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
- if (IS_ERR(p->bdev_handle)) {
- error = PTR_ERR(p->bdev_handle);
- p->bdev_handle = NULL;
+ if (IS_ERR(p->bdev_file)) {
+ error = PTR_ERR(p->bdev_file);
+ p->bdev_file = NULL;
return error;
}
- p->bdev = p->bdev_handle->bdev;
+ p->bdev = file_bdev(p->bdev_file);
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
@@ -3208,10 +3208,10 @@ bad_swap:
p->percpu_cluster = NULL;
free_percpu(p->cluster_next_cpu);
p->cluster_next_cpu = NULL;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, p->old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode = NULL;
destroy_swap_extents(p);
@@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry)
return __swap_duplicate(entry, SWAP_HAS_CACHE);
}
+void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry)
+{
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+ unsigned char usage;
+
+ ci = lock_cluster_or_swap_info(si, offset);
+ usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE);
+ unlock_cluster_or_swap_info(si, ci);
+ if (!usage)
+ free_swap_slot(entry);
+}
+
struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return swap_type_to_swap_info(swp_type(entry));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7cf7d4384259..313f1c42768a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -914,9 +914,6 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -925,6 +922,9 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
diff --git a/mm/util.c b/mm/util.c
index 5a6a9802583b..5faf3adc6f43 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -136,6 +136,23 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
EXPORT_SYMBOL(kmemdup);
/**
+ * kmemdup_array - duplicate a given array.
+ *
+ * @src: array to duplicate.
+ * @element_size: size of each element of array.
+ * @count: number of elements to duplicate from array.
+ * @gfp: GFP mask to use.
+ *
+ * Return: duplicated array of @src or %NULL in case of error,
+ * result is physically contiguous. Use kfree() to free.
+ */
+void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+{
+ return kmemdup(src, size_mul(element_size, count), gfp);
+}
+EXPORT_SYMBOL(kmemdup_array);
+
+/**
* kvmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d12a17fc0c17..1e36322d83d8 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -304,8 +304,8 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
return err;
}
-int ioremap_page_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
int err;
@@ -318,6 +318,26 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
return err;
}
+int ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
+{
+ struct vm_struct *area;
+
+ area = find_vm_area((void *)addr);
+ if (!area || !(area->flags & VM_IOREMAP)) {
+ WARN_ONCE(1, "vm_area at addr %lx is not marked as VM_IOREMAP\n", addr);
+ return -EINVAL;
+ }
+ if (addr != (unsigned long)area->addr ||
+ (void *)end != area->addr + get_vm_area_size(area)) {
+ WARN_ONCE(1, "ioremap request [%lx,%lx) doesn't match vm_area [%lx, %lx)\n",
+ addr, end, (long)area->addr,
+ (long)area->addr + get_vm_area_size(area));
+ return -ERANGE;
+ }
+ return vmap_page_range(addr, end, phys_addr, prot);
+}
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
@@ -635,6 +655,58 @@ static int vmap_pages_range(unsigned long addr, unsigned long end,
return err;
}
+static int check_sparse_vm_area(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ might_sleep();
+ if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS))
+ return -EINVAL;
+ if (WARN_ON_ONCE(area->flags & VM_NO_GUARD))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!(area->flags & VM_SPARSE)))
+ return -EINVAL;
+ if ((end - start) >> PAGE_SHIFT > totalram_pages())
+ return -E2BIG;
+ if (start < (unsigned long)area->addr ||
+ (void *)end > area->addr + get_vm_area_size(area))
+ return -ERANGE;
+ return 0;
+}
+
+/**
+ * vm_area_map_pages - map pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ * @pages: pages to map (always PAGE_SIZE pages)
+ */
+int vm_area_map_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end, struct page **pages)
+{
+ int err;
+
+ err = check_sparse_vm_area(area, start, end);
+ if (err)
+ return err;
+
+ return vmap_pages_range(start, end, PAGE_KERNEL, pages, PAGE_SHIFT);
+}
+
+/**
+ * vm_area_unmap_pages - unmap pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ */
+void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ if (check_sparse_vm_area(area, start, end))
+ return;
+
+ vunmap_range(start, end);
+}
+
int is_vmalloc_or_module_addr(const void *x)
{
/*
@@ -3809,9 +3881,9 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
if (flags & VMAP_RAM)
copied = vmap_ram_vread_iter(iter, addr, n, flags);
- else if (!(vm && (vm->flags & VM_IOREMAP)))
+ else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE))))
copied = aligned_vread_iter(iter, addr, n);
- else /* IOREMAP area is treated as memory hole */
+ else /* IOREMAP | SPARSE area is treated as memory hole */
copied = zero_iter(iter, n);
addr += copied;
@@ -4402,6 +4474,9 @@ static int s_show(struct seq_file *m, void *p)
if (v->flags & VM_IOREMAP)
seq_puts(m, " ioremap");
+ if (v->flags & VM_SPARSE)
+ seq_puts(m, " sparse");
+
if (v->flags & VM_ALLOC)
seq_puts(m, " vmalloc");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f9c854ce6cc..4255619a1a31 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5753,7 +5753,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
+ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
@@ -5998,6 +5998,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
+ if (!gfp_compaction_allowed(sc->gfp_mask))
+ return false;
+
/* Allocation can already succeed, nothing to do */
if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
sc->reclaim_idx, 0))
diff --git a/mm/zswap.c b/mm/zswap.c
index 350dd2fc8159..db4625af65fb 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -377,10 +377,9 @@ void zswap_folio_swapin(struct folio *folio)
{
struct lruvec *lruvec;
- if (folio) {
- lruvec = folio_lruvec(folio);
- atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
- }
+ VM_WARN_ON_ONCE(!folio_test_locked(folio));
+ lruvec = folio_lruvec(folio);
+ atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
}
/*********************************
@@ -1440,6 +1439,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
spin_unlock(&tree->lock);
delete_from_swap_cache(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return -ENOMEM;
}
spin_unlock(&tree->lock);
@@ -1517,7 +1518,7 @@ bool zswap_store(struct folio *folio)
if (folio_test_large(folio))
return false;
- if (!zswap_enabled || !tree)
+ if (!tree)
return false;
/*
@@ -1532,6 +1533,10 @@ bool zswap_store(struct folio *folio)
zswap_invalidate_entry(tree, dupentry);
}
spin_unlock(&tree->lock);
+
+ if (!zswap_enabled)
+ return false;
+
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);