summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/damon/sysfs.c7
-rw-r--r--mm/damon/vaddr-test.h2
-rw-r--r--mm/damon/vaddr.c3
-rw-r--r--mm/filemap.c73
-rw-r--r--mm/hugetlb.c125
-rw-r--r--mm/kasan/kasan.h9
-rw-r--r--mm/kasan/report.c2
-rw-r--r--mm/memcontrol.c19
-rw-r--r--mm/memory.c13
-rw-r--r--mm/mempolicy.c61
-rw-r--r--mm/migrate.c21
-rw-r--r--mm/mmap.c52
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/nommu.c5
-rw-r--r--mm/page_alloc.c14
-rw-r--r--mm/percpu.c35
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/rmap.c23
-rw-r--r--mm/shmem.c24
-rw-r--r--mm/slab_common.c84
-rw-r--r--mm/slub.c73
-rw-r--r--mm/swapfile.c23
-rw-r--r--mm/util.c4
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/zswap.c24
25 files changed, 435 insertions, 268 deletions
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index b86ba7b0a921..f60e56150feb 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1208,6 +1208,8 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
return 0;
}
+static bool damon_sysfs_schemes_regions_updating;
+
static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
{
struct damon_target *t, *next;
@@ -1219,8 +1221,10 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
cmd = damon_sysfs_cmd_request.cmd;
if (kdamond && ctx == kdamond->damon_ctx &&
(cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS ||
- cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES)) {
+ cmd == DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_BYTES) &&
+ damon_sysfs_schemes_regions_updating) {
damon_sysfs_schemes_update_regions_stop(ctx);
+ damon_sysfs_schemes_regions_updating = false;
mutex_unlock(&damon_sysfs_lock);
}
@@ -1340,7 +1344,6 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond)
static int damon_sysfs_cmd_request_callback(struct damon_ctx *c)
{
struct damon_sysfs_kdamond *kdamond;
- static bool damon_sysfs_schemes_regions_updating;
bool total_bytes_only = false;
int err = 0;
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
index c4b455b5ee30..dcf1ca6b31cc 100644
--- a/mm/damon/vaddr-test.h
+++ b/mm/damon/vaddr-test.h
@@ -148,6 +148,8 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
}
+
+ damon_destroy_target(t);
}
/*
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 4c81a9dbd044..cf8a9fc5c9d1 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -341,13 +341,14 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm,
bool referenced = false;
pte_t entry = huge_ptep_get(pte);
struct folio *folio = pfn_folio(pte_pfn(entry));
+ unsigned long psize = huge_page_size(hstate_vma(vma));
folio_get(folio);
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
- set_huge_pte_at(mm, addr, pte, entry);
+ set_huge_pte_at(mm, addr, pte, entry, psize);
}
#ifdef CONFIG_MMU_NOTIFIER
diff --git a/mm/filemap.c b/mm/filemap.c
index 582f5317ff71..f0a15ce1bd1b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3475,13 +3475,11 @@ skip:
*/
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
- unsigned long addr, unsigned int nr_pages)
+ unsigned long addr, unsigned int nr_pages,
+ unsigned int *mmap_miss)
{
vm_fault_t ret = 0;
- struct vm_area_struct *vma = vmf->vma;
- struct file *file = vma->vm_file;
struct page *page = folio_page(folio, start);
- unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
unsigned int count = 0;
pte_t *old_ptep = vmf->pte;
@@ -3489,8 +3487,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
if (PageHWPoison(page + count))
goto skip;
- if (mmap_miss > 0)
- mmap_miss--;
+ (*mmap_miss)++;
/*
* NOTE: If there're PTE markers, we'll leave them to be
@@ -3506,7 +3503,7 @@ skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
folio_ref_add(folio, count);
- if (in_range(vmf->address, addr, count))
+ if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
}
@@ -3520,12 +3517,40 @@ skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
folio_ref_add(folio, count);
- if (in_range(vmf->address, addr, count))
+ if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
}
vmf->pte = old_ptep;
- WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
+
+ return ret;
+}
+
+static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
+ struct folio *folio, unsigned long addr,
+ unsigned int *mmap_miss)
+{
+ vm_fault_t ret = 0;
+ struct page *page = &folio->page;
+
+ if (PageHWPoison(page))
+ return ret;
+
+ (*mmap_miss)++;
+
+ /*
+ * NOTE: If there're PTE markers, we'll leave them to be
+ * handled in the specific fault path, and it'll prohibit
+ * the fault-around logic.
+ */
+ if (!pte_none(ptep_get(vmf->pte)))
+ return ret;
+
+ if (vmf->address == addr)
+ ret = VM_FAULT_NOPAGE;
+
+ set_pte_range(vmf, folio, page, 1, addr);
+ folio_ref_inc(folio);
return ret;
}
@@ -3541,7 +3566,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct folio *folio;
vm_fault_t ret = 0;
- int nr_pages = 0;
+ unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3569,25 +3594,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
end = folio->index + folio_nr_pages(folio) - 1;
nr_pages = min(end, end_pgoff) - xas.xa_index + 1;
- /*
- * NOTE: If there're PTE markers, we'll leave them to be
- * handled in the specific fault path, and it'll prohibit the
- * fault-around logic.
- */
- if (!pte_none(ptep_get(vmf->pte)))
- goto unlock;
-
- ret |= filemap_map_folio_range(vmf, folio,
- xas.xa_index - folio->index, addr, nr_pages);
+ if (!folio_test_large(folio))
+ ret |= filemap_map_order0_folio(vmf,
+ folio, addr, &mmap_miss);
+ else
+ ret |= filemap_map_folio_range(vmf, folio,
+ xas.xa_index - folio->index, addr,
+ nr_pages, &mmap_miss);
-unlock:
folio_unlock(folio);
folio_put(folio);
- folio = next_uptodate_folio(&xas, mapping, end_pgoff);
- } while (folio);
+ } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
+
+ mmap_miss_saved = READ_ONCE(file->f_ra.mmap_miss);
+ if (mmap_miss >= mmap_miss_saved)
+ WRITE_ONCE(file->f_ra.mmap_miss, 0);
+ else
+ WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss_saved - mmap_miss);
+
return ret;
}
EXPORT_SYMBOL(filemap_map_pages);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ba6d39b71cb1..1301ba7b2c9a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
+static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_read(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ down_read(&resv_map->rw_sema);
}
}
@@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_read(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ up_read(&resv_map->rw_sema);
}
}
@@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
down_write(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ down_write(&resv_map->rw_sema);
}
}
@@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
up_write(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ up_write(&resv_map->rw_sema);
}
}
int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
{
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
- if (!__vma_shareable_lock(vma))
- return 1;
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
- return down_write_trylock(&vma_lock->rw_sema);
+ return down_write_trylock(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ return down_write_trylock(&resv_map->rw_sema);
+ }
+
+ return 1;
}
void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
@@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
lockdep_assert_held(&vma_lock->rw_sema);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ lockdep_assert_held(&resv_map->rw_sema);
}
}
@@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
__hugetlb_vma_unlock_write_put(vma_lock);
+ } else if (__vma_private_lock(vma)) {
+ struct resv_map *resv_map = vma_resv_map(vma);
+
+ /* no free for anon vmas, but still need to unlock */
+ up_write(&resv_map->rw_sema);
}
}
@@ -1068,6 +1100,7 @@ struct resv_map *resv_map_alloc(void)
kref_init(&resv_map->refs);
spin_lock_init(&resv_map->lock);
INIT_LIST_HEAD(&resv_map->regions);
+ init_rwsem(&resv_map->rw_sema);
resv_map->adds_in_progress = 0;
/*
@@ -1138,8 +1171,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
- set_vma_private_data(vma, (get_vma_private_data(vma) &
- HPAGE_RESV_MASK) | (unsigned long)map);
+ set_vma_private_data(vma, (unsigned long)map);
}
static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
@@ -4980,7 +5012,7 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
- struct folio *new_folio, pte_t old)
+ struct folio *new_folio, pte_t old, unsigned long sz)
{
pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
@@ -4988,7 +5020,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
hugepage_add_new_anon_rmap(new_folio, vma, addr);
if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
newpte = huge_pte_mkuffd_wp(newpte);
- set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, newpte, sz);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
@@ -5065,7 +5097,7 @@ again:
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
- set_huge_pte_at(dst, addr, dst_pte, entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = pte_swp_uffd_wp(entry);
@@ -5080,18 +5112,18 @@ again:
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
entry = pte_swp_mkuffd_wp(entry);
- set_huge_pte_at(src, addr, src_pte, entry);
+ set_huge_pte_at(src, addr, src_pte, entry, sz);
}
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
- set_huge_pte_at(dst, addr, dst_pte, entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry, sz);
} else if (unlikely(is_pte_marker(entry))) {
pte_marker marker = copy_pte_marker(
pte_to_swp_entry(entry), dst_vma);
if (marker)
set_huge_pte_at(dst, addr, dst_pte,
- make_pte_marker(marker));
+ make_pte_marker(marker), sz);
} else {
entry = huge_ptep_get(src_pte);
pte_folio = page_folio(pte_page(entry));
@@ -5145,7 +5177,7 @@ again:
goto again;
}
hugetlb_install_folio(dst_vma, dst_pte, addr,
- new_folio, src_pte_old);
+ new_folio, src_pte_old, sz);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
@@ -5166,7 +5198,7 @@ again:
if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
- set_huge_pte_at(dst, addr, dst_pte, entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry, sz);
hugetlb_count_add(npages, dst);
}
spin_unlock(src_ptl);
@@ -5184,7 +5216,8 @@ again:
}
static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte)
+ unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
+ unsigned long sz)
{
struct hstate *h = hstate_vma(vma);
struct mm_struct *mm = vma->vm_mm;
@@ -5202,7 +5235,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
- set_huge_pte_at(mm, new_addr, dst_pte, pte);
+ set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
if (src_ptl != dst_ptl)
spin_unlock(src_ptl);
@@ -5259,7 +5292,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
if (!dst_pte)
break;
- move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte);
+ move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz);
}
if (shared_pmd)
@@ -5273,9 +5306,9 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
return len + old_addr - old_end;
}
-static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- struct page *ref_page, zap_flags_t zap_flags)
+void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct page *ref_page, zap_flags_t zap_flags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
@@ -5337,7 +5370,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (pte_swp_uffd_wp_any(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
- make_pte_marker(PTE_MARKER_UFFD_WP));
+ make_pte_marker(PTE_MARKER_UFFD_WP),
+ sz);
else
huge_pte_clear(mm, address, ptep, sz);
spin_unlock(ptl);
@@ -5371,7 +5405,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (huge_pte_uffd_wp(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
- make_pte_marker(PTE_MARKER_UFFD_WP));
+ make_pte_marker(PTE_MARKER_UFFD_WP),
+ sz);
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, vma, true);
@@ -5402,16 +5437,25 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
tlb_flush_mmu_tlbonly(tlb);
}
-void __unmap_hugepage_range_final(struct mmu_gather *tlb,
- struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page,
- zap_flags_t zap_flags)
+void __hugetlb_zap_begin(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
{
+ if (!vma->vm_file) /* hugetlbfs_file_mmap error */
+ return;
+
+ adjust_range_if_pmd_sharing_possible(vma, start, end);
hugetlb_vma_lock_write(vma);
- i_mmap_lock_write(vma->vm_file->f_mapping);
+ if (vma->vm_file)
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+}
- /* mmu notification performed in caller */
- __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
+void __hugetlb_zap_end(struct vm_area_struct *vma,
+ struct zap_details *details)
+{
+ zap_flags_t zap_flags = details ? details->zap_flags : 0;
+
+ if (!vma->vm_file) /* hugetlbfs_file_mmap error */
+ return;
if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
/*
@@ -5424,11 +5468,12 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
* someone else.
*/
__hugetlb_vma_unlock_write_free(vma);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
} else {
- i_mmap_unlock_write(vma->vm_file->f_mapping);
hugetlb_vma_unlock_write(vma);
}
+
+ if (vma->vm_file)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
@@ -5676,7 +5721,7 @@ retry_avoidcopy:
hugepage_add_new_anon_rmap(new_folio, vma, haddr);
if (huge_pte_uffd_wp(pte))
newpte = huge_pte_mkuffd_wp(newpte);
- set_huge_pte_at(mm, haddr, ptep, newpte);
+ set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h));
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = old_folio;
@@ -5972,7 +6017,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
*/
if (unlikely(pte_marker_uffd_wp(old_pte)))
new_pte = huge_pte_mkuffd_wp(new_pte);
- set_huge_pte_at(mm, haddr, ptep, new_pte);
+ set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
@@ -6261,7 +6306,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
}
_dst_pte = make_pte_marker(PTE_MARKER_POISONED);
- set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte,
+ huge_page_size(h));
/* No need to invalidate - it was non-present before */
update_mmu_cache(dst_vma, dst_addr, dst_pte);
@@ -6412,7 +6458,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
if (wp_enabled)
_dst_pte = huge_pte_mkuffd_wp(_dst_pte);
- set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), dst_mm);
@@ -6598,7 +6644,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
else if (uffd_wp_resolve)
newpte = pte_swp_clear_uffd_wp(newpte);
if (!pte_same(pte, newpte))
- set_huge_pte_at(mm, address, ptep, newpte);
+ set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
/* No other markers apply for now. */
WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
@@ -6623,7 +6669,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
if (unlikely(uffd_wp))
/* Safe to modify directly (none->non-present). */
set_huge_pte_at(mm, address, ptep,
- make_pte_marker(PTE_MARKER_UFFD_WP));
+ make_pte_marker(PTE_MARKER_UFFD_WP),
+ psize);
}
spin_unlock(ptl);
}
@@ -6806,8 +6853,10 @@ out_err:
*/
if (chg >= 0 && add < 0)
region_abort(resv_map, from, to, regions_needed);
- if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+ if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
kref_put(&resv_map->refs, resv_map_release);
+ set_vma_resv_map(vma, NULL);
+ }
return false;
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index f70e3d7a602e..8b06bab5c406 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -291,7 +291,7 @@ struct kasan_stack_ring {
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-#ifndef __HAVE_ARCH_SHADOW_MAP
+#ifndef kasan_shadow_to_mem
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
@@ -299,15 +299,13 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
}
#endif
+#ifndef addr_has_metadata
static __always_inline bool addr_has_metadata(const void *addr)
{
-#ifdef __HAVE_ARCH_SHADOW_MAP
- return (kasan_mem_to_shadow((void *)addr) != NULL);
-#else
return (kasan_reset_tag(addr) >=
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
-#endif
}
+#endif
/**
* kasan_check_range - Check memory region, and report if invalid access.
@@ -564,7 +562,6 @@ void kasan_restore_multi_shot(bool enabled);
* code. Declared here to avoid warnings about missing declarations.
*/
-asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
void __asan_register_globals(void *globals, ssize_t size);
void __asan_unregister_globals(void *globals, ssize_t size);
void __asan_handle_no_return(void);
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index ca4b6ff080a6..6e3cb118d20e 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -621,7 +621,7 @@ void kasan_report_async(void)
}
#endif /* CONFIG_KASAN_HW_TAGS */
-#ifdef CONFIG_KASAN_INLINE
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
/*
* With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high
* canonical half of the address space) cause out-of-bounds shadow memory reads
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a4d3282493b6..5b009b233ab8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2555,7 +2555,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
* Scheduled by try_charge() to be executed from the userland return path
* and reclaims memory over the high limit.
*/
-void mem_cgroup_handle_over_high(void)
+void mem_cgroup_handle_over_high(gfp_t gfp_mask)
{
unsigned long penalty_jiffies;
unsigned long pflags;
@@ -2583,7 +2583,7 @@ retry_reclaim:
*/
nr_reclaimed = reclaim_high(memcg,
in_retry ? SWAP_CLUSTER_MAX : nr_pages,
- GFP_KERNEL);
+ gfp_mask);
/*
* memory.high is breached and reclaim is unable to keep up. Throttle
@@ -2819,7 +2819,7 @@ done_restock:
if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH &&
!(current->flags & PF_MEMALLOC) &&
gfpflags_allow_blocking(gfp_mask)) {
- mem_cgroup_handle_over_high();
+ mem_cgroup_handle_over_high(gfp_mask);
}
return 0;
}
@@ -3867,6 +3867,13 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
case _MEMSWAP:
ret = mem_cgroup_resize_max(memcg, nr_pages, true);
break;
+ case _KMEM:
+ pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. "
+ "Writing any value to this file has no effect. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
+ ret = 0;
+ break;
case _TCP:
ret = memcg_update_tcp_max(memcg, nr_pages);
break;
@@ -5078,6 +5085,12 @@ static struct cftype mem_cgroup_legacy_files[] = {
},
#endif
{
+ .name = "kmem.limit_in_bytes",
+ .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
+ .write = mem_cgroup_write,
+ .read_u64 = mem_cgroup_read_u64,
+ },
+ {
.name = "kmem.usage_in_bytes",
.private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
.read_u64 = mem_cgroup_read_u64,
diff --git a/mm/memory.c b/mm/memory.c
index 6c264d2f969c..517221f01303 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1683,7 +1683,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
if (vma->vm_file) {
zap_flags_t zap_flags = details ?
details->zap_flags : 0;
- __unmap_hugepage_range_final(tlb, vma, start, end,
+ __unmap_hugepage_range(tlb, vma, start, end,
NULL, zap_flags);
}
} else
@@ -1728,8 +1728,12 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
do {
- unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
+ unsigned long start = start_addr;
+ unsigned long end = end_addr;
+ hugetlb_zap_begin(vma, &start, &end);
+ unmap_single_vma(tlb, vma, start, end, &details,
mm_wr_locked);
+ hugetlb_zap_end(vma, &details);
} while ((vma = mas_find(mas, tree_end - 1)) != NULL);
mmu_notifier_invalidate_range_end(&range);
}
@@ -1753,9 +1757,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
address, end);
- if (is_vm_hugetlb_page(vma))
- adjust_range_if_pmd_sharing_possible(vma, &range.start,
- &range.end);
+ hugetlb_zap_begin(vma, &range.start, &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
@@ -1766,6 +1768,7 @@ void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unmap_single_vma(&tlb, vma, address, end, details, false);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
+ hugetlb_zap_end(vma, details);
}
/**
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 42b5567e3773..e52e3a0b8f2e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -131,22 +131,26 @@ static struct mempolicy default_policy = {
static struct mempolicy preferred_node_policy[MAX_NUMNODES];
/**
- * numa_map_to_online_node - Find closest online node
+ * numa_nearest_node - Find nearest node by state
* @node: Node id to start the search
+ * @state: State to filter the search
*
- * Lookup the next closest node by distance if @nid is not online.
+ * Lookup the closest node by distance if @nid is not in state.
*
- * Return: this @node if it is online, otherwise the closest node by distance
+ * Return: this @node if it is in state, otherwise the closest node by distance
*/
-int numa_map_to_online_node(int node)
+int numa_nearest_node(int node, unsigned int state)
{
int min_dist = INT_MAX, dist, n, min_node;
- if (node == NUMA_NO_NODE || node_online(node))
+ if (state >= NR_NODE_STATES)
+ return -EINVAL;
+
+ if (node == NUMA_NO_NODE || node_state(node, state))
return node;
min_node = node;
- for_each_online_node(n) {
+ for_each_node_state(n, state) {
dist = node_distance(node, n);
if (dist < min_dist) {
min_dist = dist;
@@ -156,7 +160,7 @@ int numa_map_to_online_node(int node)
return min_node;
}
-EXPORT_SYMBOL_GPL(numa_map_to_online_node);
+EXPORT_SYMBOL_GPL(numa_nearest_node);
struct mempolicy *get_task_policy(struct task_struct *p)
{
@@ -426,6 +430,7 @@ struct queue_pages {
unsigned long start;
unsigned long end;
struct vm_area_struct *first;
+ bool has_unmovable;
};
/*
@@ -446,9 +451,8 @@ static inline bool queue_folio_required(struct folio *folio,
/*
* queue_folios_pmd() has three possible return values:
* 0 - folios are placed on the right node or queued successfully, or
- * special page is met, i.e. huge zero page.
- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
- * specified.
+ * special page is met, i.e. zero page, or unmovable page is found
+ * but continue walking (indicated by queue_pages.has_unmovable).
* -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
* existing folio was already on a node that does not follow the
* policy.
@@ -479,7 +483,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(walk->vma) ||
migrate_folio_add(folio, qp->pagelist, flags)) {
- ret = 1;
+ qp->has_unmovable = true;
goto unlock;
}
} else
@@ -495,9 +499,8 @@ unlock:
*
* queue_folios_pte_range() has three possible return values:
* 0 - folios are placed on the right node or queued successfully, or
- * special page is met, i.e. zero page.
- * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
- * specified.
+ * special page is met, i.e. zero page, or unmovable page is found
+ * but continue walking (indicated by queue_pages.has_unmovable).
* -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
* on a node that does not follow the policy.
*/
@@ -508,7 +511,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
struct folio *folio;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
- bool has_unmovable = false;
pte_t *pte, *mapped_pte;
pte_t ptent;
spinlock_t *ptl;
@@ -538,11 +540,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
if (!queue_folio_required(folio, qp))
continue;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
- /* MPOL_MF_STRICT must be specified if we get here */
- if (!vma_migratable(vma)) {
- has_unmovable = true;
- break;
- }
+ /*
+ * MPOL_MF_STRICT must be specified if we get here.
+ * Continue walking vmas due to MPOL_MF_MOVE* flags.
+ */
+ if (!vma_migratable(vma))
+ qp->has_unmovable = true;
/*
* Do not abort immediately since there may be
@@ -550,16 +553,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
* need migrate other LRU pages.
*/
if (migrate_folio_add(folio, qp->pagelist, flags))
- has_unmovable = true;
+ qp->has_unmovable = true;
} else
break;
}
pte_unmap_unlock(mapped_pte, ptl);
cond_resched();
- if (has_unmovable)
- return 1;
-
return addr != end ? -EIO : 0;
}
@@ -599,7 +599,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
* Detecting misplaced folio but allow migrating folios which
* have been queued.
*/
- ret = 1;
+ qp->has_unmovable = true;
goto unlock;
}
@@ -620,7 +620,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
* Failed to isolate folio but allow migrating pages
* which have been queued.
*/
- ret = 1;
+ qp->has_unmovable = true;
}
unlock:
spin_unlock(ptl);
@@ -756,12 +756,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.start = start,
.end = end,
.first = NULL,
+ .has_unmovable = false,
};
const struct mm_walk_ops *ops = lock_vma ?
&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
err = walk_page_range(mm, start, end, ops, &qp);
+ if (qp.has_unmovable)
+ err = 1;
if (!qp.first)
/* whole range in hole */
err = -EFAULT;
@@ -1358,7 +1361,7 @@ static long do_mbind(unsigned long start, unsigned long len,
putback_movable_pages(&pagelist);
}
- if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
+ if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
err = -EIO;
} else {
up_out:
@@ -1544,8 +1547,10 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
* the home node for vmas we already updated before.
*/
old = vma_policy(vma);
- if (!old)
+ if (!old) {
+ prev = vma;
continue;
+ }
if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) {
err = -EOPNOTSUPP;
break;
diff --git a/mm/migrate.c b/mm/migrate.c
index b7fa020003f3..06086dc9da28 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -243,7 +243,9 @@ static bool remove_migration_pte(struct folio *folio,
#ifdef CONFIG_HUGETLB_PAGE
if (folio_test_hugetlb(folio)) {
- unsigned int shift = huge_page_shift(hstate_vma(vma));
+ struct hstate *h = hstate_vma(vma);
+ unsigned int shift = huge_page_shift(h);
+ unsigned long psize = huge_page_size(h);
pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
if (folio_test_anon(folio))
@@ -251,7 +253,8 @@ static bool remove_migration_pte(struct folio *folio,
rmap_flags);
else
page_dup_file_rmap(new, true);
- set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
+ set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte,
+ psize);
} else
#endif
{
@@ -2159,6 +2162,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
const int __user *nodes,
int __user *status, int flags)
{
+ compat_uptr_t __user *compat_pages = (void __user *)pages;
int current_node = NUMA_NO_NODE;
LIST_HEAD(pagelist);
int start, i;
@@ -2171,8 +2175,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
int node;
err = -EFAULT;
- if (get_user(p, pages + i))
- goto out_flush;
+ if (in_compat_syscall()) {
+ compat_uptr_t cp;
+
+ if (get_user(cp, compat_pages + i))
+ goto out_flush;
+
+ p = compat_ptr(cp);
+ } else {
+ if (get_user(p, pages + i))
+ goto out_flush;
+ }
if (get_user(node, nodes + i))
goto out_flush;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4b0eaf6427c4..da2e3bd6dba1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -583,11 +583,12 @@ again:
* dup_anon_vma() - Helper function to duplicate anon_vma
* @dst: The destination VMA
* @src: The source VMA
+ * @dup: Pointer to the destination VMA when successful.
*
* Returns: 0 on success.
*/
static inline int dup_anon_vma(struct vm_area_struct *dst,
- struct vm_area_struct *src)
+ struct vm_area_struct *src, struct vm_area_struct **dup)
{
/*
* Easily overlooked: when mprotect shifts the boundary, make sure the
@@ -595,9 +596,15 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
* anon pages imported.
*/
if (src->anon_vma && !dst->anon_vma) {
+ int ret;
+
vma_assert_write_locked(dst);
dst->anon_vma = src->anon_vma;
- return anon_vma_clone(dst, src);
+ ret = anon_vma_clone(dst, src);
+ if (ret)
+ return ret;
+
+ *dup = dst;
}
return 0;
@@ -624,6 +631,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff,
struct vm_area_struct *next)
{
+ struct vm_area_struct *anon_dup = NULL;
bool remove_next = false;
struct vma_prepare vp;
@@ -633,7 +641,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
remove_next = true;
vma_start_write(next);
- ret = dup_anon_vma(vma, next);
+ ret = dup_anon_vma(vma, next, &anon_dup);
if (ret)
return ret;
}
@@ -661,6 +669,8 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
return 0;
nomem:
+ if (anon_dup)
+ unlink_anon_vmas(anon_dup);
return -ENOMEM;
}
@@ -860,6 +870,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
{
struct vm_area_struct *curr, *next, *res;
struct vm_area_struct *vma, *adjust, *remove, *remove2;
+ struct vm_area_struct *anon_dup = NULL;
struct vma_prepare vp;
pgoff_t vma_pgoff;
int err = 0;
@@ -927,18 +938,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_start_write(next);
remove = next; /* case 1 */
vma_end = next->vm_end;
- err = dup_anon_vma(prev, next);
+ err = dup_anon_vma(prev, next, &anon_dup);
if (curr) { /* case 6 */
vma_start_write(curr);
remove = curr;
remove2 = next;
if (!next->anon_vma)
- err = dup_anon_vma(prev, curr);
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr);
+ err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
remove = curr;
} else { /* case 5 */
@@ -954,7 +965,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_end = addr;
adjust = next;
adj_start = -(prev->vm_end - addr);
- err = dup_anon_vma(next, prev);
+ err = dup_anon_vma(next, prev, &anon_dup);
} else {
/*
* Note that cases 3 and 8 are the ONLY ones where prev
@@ -968,14 +979,14 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_pgoff = curr->vm_pgoff;
vma_start_write(curr);
remove = curr;
- err = dup_anon_vma(next, curr);
+ err = dup_anon_vma(next, curr, &anon_dup);
}
}
}
/* Error in anon_vma clone. */
if (err)
- return NULL;
+ goto anon_vma_fail;
if (vma_start < vma->vm_start || vma_end > vma->vm_end)
vma_expanded = true;
@@ -988,7 +999,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
}
if (vma_iter_prealloc(vmi, vma))
- return NULL;
+ goto prealloc_fail;
init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma &&
@@ -1016,6 +1027,15 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
vma_complete(&vp, vmi, mm);
khugepaged_enter_vma(res, vm_flags);
return res;
+
+prealloc_fail:
+ if (anon_dup)
+ unlink_anon_vmas(anon_dup);
+
+anon_vma_fail:
+ vma_iter_set(vmi, addr);
+ vma_iter_load(vmi);
+ return NULL;
}
/*
@@ -3143,13 +3163,13 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (!len)
return 0;
- if (mmap_write_lock_killable(mm))
- return -EINTR;
-
/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
ret = check_brk_limits(addr, len);
if (ret)
goto limits_failed;
@@ -3174,12 +3194,6 @@ limits_failed:
}
EXPORT_SYMBOL(vm_brk_flags);
-int vm_brk(unsigned long addr, unsigned long len)
-{
- return vm_brk_flags(addr, len, 0);
-}
-EXPORT_SYMBOL(vm_brk);
-
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
diff --git a/mm/mremap.c b/mm/mremap.c
index 056478c106ee..382e81c33fc4 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
}
vma_iter_init(&vmi, mm, old_addr);
- if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) {
+ if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) {
/* OOM: unable to split vma, just get accounts right */
if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
vm_acct_memory(old_len >> PAGE_SHIFT);
diff --git a/mm/nommu.c b/mm/nommu.c
index 7f9e9e5a0e12..23c43c208f2b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1531,11 +1531,6 @@ void exit_mmap(struct mm_struct *mm)
mmap_write_unlock(mm);
}
-int vm_brk(unsigned long addr, unsigned long len)
-{
- return -ENOMEM;
-}
-
/*
* expand (or shrink) an existing mapping, potentially moving it at the same
* time (controlled by the MREMAP_MAYMOVE flag and available VM space)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0c5be12f9336..85741403948f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2400,7 +2400,7 @@ void free_unref_page(struct page *page, unsigned int order)
struct per_cpu_pages *pcp;
struct zone *zone;
unsigned long pfn = page_to_pfn(page);
- int migratetype;
+ int migratetype, pcpmigratetype;
if (!free_unref_page_prepare(page, pfn, order))
return;
@@ -2408,24 +2408,24 @@ void free_unref_page(struct page *page, unsigned int order)
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Place ISOLATE pages on the isolated list because they are being
- * offlined but treat HIGHATOMIC as movable pages so we can get those
- * areas back if necessary. Otherwise, we may have to free
+ * offlined but treat HIGHATOMIC and CMA as movable pages so we can
+ * get those areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
- migratetype = get_pcppage_migratetype(page);
+ migratetype = pcpmigratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
return;
}
- migratetype = MIGRATE_MOVABLE;
+ pcpmigratetype = MIGRATE_MOVABLE;
}
zone = page_zone(page);
pcp_trylock_prepare(UP_flags);
pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (pcp) {
- free_unref_page_commit(zone, pcp, page, migratetype, order);
+ free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
pcp_spin_unlock(pcp);
} else {
free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
@@ -6475,6 +6475,7 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
next_page = page;
current_buddy = page + size;
}
+ page = next_page;
if (set_page_guard(zone, current_buddy, high, migratetype))
continue;
@@ -6482,7 +6483,6 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
if (current_buddy != target) {
add_to_free_list(current_buddy, zone, high, migratetype);
set_buddy_order(current_buddy, high);
- page = next_page;
}
}
}
diff --git a/mm/percpu.c b/mm/percpu.c
index a7665de8485f..60ed078e4cd0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2245,6 +2245,37 @@ static void pcpu_balance_workfn(struct work_struct *work)
}
/**
+ * pcpu_alloc_size - the size of the dynamic percpu area
+ * @ptr: pointer to the dynamic percpu area
+ *
+ * Returns the size of the @ptr allocation. This is undefined for statically
+ * defined percpu variables as there is no corresponding chunk->bound_map.
+ *
+ * RETURNS:
+ * The size of the dynamic percpu area.
+ *
+ * CONTEXT:
+ * Can be called from atomic context.
+ */
+size_t pcpu_alloc_size(void __percpu *ptr)
+{
+ struct pcpu_chunk *chunk;
+ unsigned long bit_off, end;
+ void *addr;
+
+ if (!ptr)
+ return 0;
+
+ addr = __pcpu_ptr_to_addr(ptr);
+ /* No pcpu_lock here: ptr has not been freed, so chunk is still alive */
+ chunk = pcpu_chunk_addr_search(addr);
+ bit_off = (addr - chunk->base_addr) / PCPU_MIN_ALLOC_SIZE;
+ end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
+ bit_off + 1);
+ return (end - bit_off) * PCPU_MIN_ALLOC_SIZE;
+}
+
+/**
* free_percpu - free percpu area
* @ptr: pointer to area to free
*
@@ -2267,12 +2298,10 @@ void free_percpu(void __percpu *ptr)
kmemleak_free_percpu(ptr);
addr = __pcpu_ptr_to_addr(ptr);
-
- spin_lock_irqsave(&pcpu_lock, flags);
-
chunk = pcpu_chunk_addr_search(addr);
off = addr - chunk->base_addr;
+ spin_lock_irqsave(&pcpu_lock, flags);
size = pcpu_free_area(chunk, off);
pcpu_memcg_free_hook(chunk, off, size);
diff --git a/mm/readahead.c b/mm/readahead.c
index e815c114de21..6925e6959fd3 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -735,7 +735,8 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
*/
ret = -EINVAL;
if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
- !S_ISREG(file_inode(f.file)->i_mode))
+ (!S_ISREG(file_inode(f.file)->i_mode) &&
+ !S_ISBLK(file_inode(f.file)->i_mode)))
goto out;
ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
diff --git a/mm/rmap.c b/mm/rmap.c
index ec7f8e6c9e48..9f795b93cf40 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1480,6 +1480,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
unsigned long pfn;
+ unsigned long hsz = 0;
/*
* When racing against e.g. zap_pte_range() on another cpu,
@@ -1511,6 +1512,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
*/
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
+
+ /* We need the huge page size for set_huge_pte_at() */
+ hsz = huge_page_size(hstate_vma(vma));
}
mmu_notifier_invalidate_range_start(&range);
@@ -1628,7 +1632,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
- set_huge_pte_at(mm, address, pvmw.pte, pteval);
+ set_huge_pte_at(mm, address, pvmw.pte, pteval,
+ hsz);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@@ -1820,6 +1825,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
struct mmu_notifier_range range;
enum ttu_flags flags = (enum ttu_flags)(long)arg;
unsigned long pfn;
+ unsigned long hsz = 0;
/*
* When racing against e.g. zap_pte_range() on another cpu,
@@ -1855,6 +1861,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
*/
adjust_range_if_pmd_sharing_possible(vma, &range.start,
&range.end);
+
+ /* We need the huge page size for set_huge_pte_at() */
+ hsz = huge_page_size(hstate_vma(vma));
}
mmu_notifier_invalidate_range_start(&range);
@@ -2020,7 +2029,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
hugetlb_count_sub(folio_nr_pages(folio), mm);
- set_huge_pte_at(mm, address, pvmw.pte, pteval);
+ set_huge_pte_at(mm, address, pvmw.pte, pteval,
+ hsz);
} else {
dec_mm_counter(mm, mm_counter(&folio->page));
set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2044,7 +2054,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (arch_unmap_one(mm, vma, address, pteval) < 0) {
if (folio_test_hugetlb(folio))
- set_huge_pte_at(mm, address, pvmw.pte, pteval);
+ set_huge_pte_at(mm, address, pvmw.pte,
+ pteval, hsz);
else
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
@@ -2058,7 +2069,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (anon_exclusive &&
page_try_share_anon_rmap(subpage)) {
if (folio_test_hugetlb(folio))
- set_huge_pte_at(mm, address, pvmw.pte, pteval);
+ set_huge_pte_at(mm, address, pvmw.pte,
+ pteval, hsz);
else
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
@@ -2090,7 +2102,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_uffd_wp(pteval))
swp_pte = pte_swp_mkuffd_wp(swp_pte);
if (folio_test_hugetlb(folio))
- set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
+ set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
+ hsz);
else
set_pte_at(mm, address, pvmw.pte, swp_pte);
trace_set_migration_pte(address, pte_val(swp_pte),
diff --git a/mm/shmem.c b/mm/shmem.c
index 02e62fccc80d..6b102965d355 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1112,7 +1112,7 @@ whole_folios:
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
shmem_undo_range(inode, lstart, lend, false);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
inode_inc_iversion(inode);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1224,7 +1224,7 @@ static int shmem_setattr(struct mnt_idmap *idmap,
if (!error && update_ctime) {
inode_set_ctime_current(inode);
if (update_mtime)
- inode->i_mtime = inode_get_ctime(inode);
+ inode_set_mtime_to_ts(inode, inode_get_ctime(inode));
inode_inc_iversion(inode);
}
return error;
@@ -2455,7 +2455,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
inode->i_ino = ino;
inode_init_owner(idmap, inode, dir, mode);
inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inode->i_generation = get_random_u32();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
@@ -2463,7 +2463,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
- info->i_crtime = inode->i_mtime;
+ info->i_crtime = inode_get_mtime(inode);
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
if (info->fsflags)
@@ -3229,7 +3229,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
goto out_iput;
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
@@ -3318,8 +3318,8 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
}
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_mtime = inode_set_ctime_to_ts(dir,
- inode_set_ctime_current(inode));
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
inode_inc_iversion(dir);
inc_nlink(inode);
ihold(inode); /* New dentry reference */
@@ -3339,8 +3339,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
dir->i_size -= BOGO_DIRENT_SIZE;
- dir->i_mtime = inode_set_ctime_to_ts(dir,
- inode_set_ctime_current(inode));
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
inode_inc_iversion(dir);
drop_nlink(inode);
dput(dentry); /* Undo the count from "create" - this does all the work */
@@ -3488,7 +3488,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
folio_put(folio);
}
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry);
@@ -3714,7 +3714,7 @@ static const struct xattr_handler shmem_user_xattr_handler = {
.set = shmem_xattr_handler_set,
};
-static const struct xattr_handler *shmem_xattr_handlers[] = {
+static const struct xattr_handler * const shmem_xattr_handlers[] = {
&shmem_security_xattr_handler,
&shmem_trusted_xattr_handler,
&shmem_user_xattr_handler,
@@ -4586,7 +4586,7 @@ static struct file_system_type shmem_fs_type = {
#endif
.kill_sb = kill_litter_super,
#ifdef CONFIG_SHMEM
- .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME,
+ .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
#else
.fs_flags = FS_USERNS_MOUNT,
#endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cd71f9581e67..8d431193c273 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -479,7 +479,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s)
{
- int refcnt;
+ int err = -EBUSY;
bool rcu_set;
if (unlikely(!s) || !kasan_check_byte(s))
@@ -490,17 +490,17 @@ void kmem_cache_destroy(struct kmem_cache *s)
rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
- refcnt = --s->refcount;
- if (refcnt)
+ s->refcount--;
+ if (s->refcount)
goto out_unlock;
- WARN(shutdown_cache(s),
- "%s %s: Slab cache still has objects when called from %pS",
+ err = shutdown_cache(s);
+ WARN(err, "%s %s: Slab cache still has objects when called from %pS",
__func__, s->name, (void *)_RET_IP_);
out_unlock:
mutex_unlock(&slab_mutex);
cpus_read_unlock();
- if (!refcnt && !rcu_set)
+ if (!err && !rcu_set)
kmem_cache_release(s);
}
EXPORT_SYMBOL(kmem_cache_destroy);
@@ -528,26 +528,6 @@ bool slab_is_available(void)
}
#ifdef CONFIG_PRINTK
-/**
- * kmem_valid_obj - does the pointer reference a valid slab object?
- * @object: pointer to query.
- *
- * Return: %true if the pointer is to a not-yet-freed object from
- * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer
- * is to an already-freed object, and %false otherwise.
- */
-bool kmem_valid_obj(void *object)
-{
- struct folio *folio;
-
- /* Some arches consider ZERO_SIZE_PTR to be a valid address. */
- if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
- return false;
- folio = virt_to_folio(object);
- return folio_test_slab(folio);
-}
-EXPORT_SYMBOL_GPL(kmem_valid_obj);
-
static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
if (__kfence_obj_info(kpp, object, slab))
@@ -566,11 +546,11 @@ static void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *
* and, if available, the slab name, return address, and stack trace from
* the allocation and last free path of that object.
*
- * This function will splat if passed a pointer to a non-slab object.
- * If you are not sure what type of object you have, you should instead
- * use mem_dump_obj().
+ * Return: %true if the pointer is to a not-yet-freed object from
+ * kmalloc() or kmem_cache_alloc(), either %true or %false if the pointer
+ * is to an already-freed object, and %false otherwise.
*/
-void kmem_dump_obj(void *object)
+bool kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
@@ -578,13 +558,13 @@ void kmem_dump_obj(void *object)
unsigned long ptroffset;
struct kmem_obj_info kp = { };
- if (WARN_ON_ONCE(!virt_addr_valid(object)))
- return;
+ /* Some arches consider ZERO_SIZE_PTR to be a valid address. */
+ if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
+ return false;
slab = virt_to_slab(object);
- if (WARN_ON_ONCE(!slab)) {
- pr_cont(" non-slab memory.\n");
- return;
- }
+ if (!slab)
+ return false;
+
kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
@@ -621,6 +601,7 @@ void kmem_dump_obj(void *object)
pr_info(" %pS\n", kp.kp_free_stack[i]);
}
+ return true;
}
EXPORT_SYMBOL_GPL(kmem_dump_obj);
#endif
@@ -745,24 +726,24 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
size_t kmalloc_size_roundup(size_t size)
{
- struct kmem_cache *c;
+ if (size && size <= KMALLOC_MAX_CACHE_SIZE) {
+ /*
+ * The flags don't matter since size_index is common to all.
+ * Neither does the caller for just getting ->object_size.
+ */
+ return kmalloc_slab(size, GFP_KERNEL, 0)->object_size;
+ }
- /* Short-circuit the 0 size case. */
- if (unlikely(size == 0))
- return 0;
- /* Short-circuit saturated "too-large" case. */
- if (unlikely(size == SIZE_MAX))
- return SIZE_MAX;
/* Above the smaller buckets, size is a multiple of page size. */
- if (size > KMALLOC_MAX_CACHE_SIZE)
+ if (size && size <= KMALLOC_MAX_SIZE)
return PAGE_SIZE << get_order(size);
/*
- * The flags don't matter since size_index is common to all.
- * Neither does the caller for just getting ->object_size.
+ * Return 'size' for 0 - kmalloc() returns ZERO_SIZE_PTR
+ * and very large size - kmalloc() may fail.
*/
- c = kmalloc_slab(size, GFP_KERNEL, 0);
- return c ? c->object_size : 0;
+ return size;
+
}
EXPORT_SYMBOL(kmalloc_size_roundup);
@@ -895,10 +876,13 @@ void __init setup_kmalloc_cache_index_table(void)
static unsigned int __kmalloc_minalign(void)
{
+ unsigned int minalign = dma_get_cache_alignment();
+
if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
is_swiotlb_allocated())
- return ARCH_KMALLOC_MINALIGN;
- return dma_get_cache_alignment();
+ minalign = ARCH_KMALLOC_MINALIGN;
+
+ return max(minalign, arch_slab_minalign());
}
void __init
diff --git a/mm/slub.c b/mm/slub.c
index f7940048138c..63d281dfacdb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4110,17 +4110,12 @@ static unsigned int slub_min_objects;
* the smallest order which will fit the object.
*/
static inline unsigned int calc_slab_order(unsigned int size,
- unsigned int min_objects, unsigned int max_order,
+ unsigned int min_order, unsigned int max_order,
unsigned int fract_leftover)
{
- unsigned int min_order = slub_min_order;
unsigned int order;
- if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
- return get_order(size * MAX_OBJS_PER_PAGE) - 1;
-
- for (order = max(min_order, (unsigned int)get_order(min_objects * size));
- order <= max_order; order++) {
+ for (order = min_order; order <= max_order; order++) {
unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
unsigned int rem;
@@ -4139,16 +4134,8 @@ static inline int calculate_order(unsigned int size)
unsigned int order;
unsigned int min_objects;
unsigned int max_objects;
- unsigned int nr_cpus;
+ unsigned int min_order;
- /*
- * Attempt to find best configuration for a slab. This
- * works by first attempting to generate a layout with
- * the best configuration and backing off gradually.
- *
- * First we increase the acceptable waste in a slab. Then
- * we reduce the minimum objects required in a slab.
- */
min_objects = slub_min_objects;
if (!min_objects) {
/*
@@ -4160,40 +4147,46 @@ static inline int calculate_order(unsigned int size)
* order on systems that appear larger than they are, and too
* low order on systems that appear smaller than they are.
*/
- nr_cpus = num_present_cpus();
+ unsigned int nr_cpus = num_present_cpus();
if (nr_cpus <= 1)
nr_cpus = nr_cpu_ids;
min_objects = 4 * (fls(nr_cpus) + 1);
}
- max_objects = order_objects(slub_max_order, size);
+ /* min_objects can't be 0 because get_order(0) is undefined */
+ max_objects = max(order_objects(slub_max_order, size), 1U);
min_objects = min(min_objects, max_objects);
- while (min_objects > 1) {
- unsigned int fraction;
-
- fraction = 16;
- while (fraction >= 4) {
- order = calc_slab_order(size, min_objects,
- slub_max_order, fraction);
- if (order <= slub_max_order)
- return order;
- fraction /= 2;
- }
- min_objects--;
- }
+ min_order = max_t(unsigned int, slub_min_order,
+ get_order(min_objects * size));
+ if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
+ return get_order(size * MAX_OBJS_PER_PAGE) - 1;
/*
- * We were unable to place multiple objects in a slab. Now
- * lets see if we can place a single object there.
+ * Attempt to find best configuration for a slab. This works by first
+ * attempting to generate a layout with the best possible configuration
+ * and backing off gradually.
+ *
+ * We start with accepting at most 1/16 waste and try to find the
+ * smallest order from min_objects-derived/slub_min_order up to
+ * slub_max_order that will satisfy the constraint. Note that increasing
+ * the order can only result in same or less fractional waste, not more.
+ *
+ * If that fails, we increase the acceptable fraction of waste and try
+ * again. The last iteration with fraction of 1/2 would effectively
+ * accept any waste and give us the order determined by min_objects, as
+ * long as at least single object fits within slub_max_order.
*/
- order = calc_slab_order(size, 1, slub_max_order, 1);
- if (order <= slub_max_order)
- return order;
+ for (unsigned int fraction = 16; fraction > 1; fraction /= 2) {
+ order = calc_slab_order(size, min_order, slub_max_order,
+ fraction);
+ if (order <= slub_max_order)
+ return order;
+ }
/*
* Doh this slab cannot be placed using slub_max_order.
*/
- order = calc_slab_order(size, 1, MAX_ORDER, 1);
+ order = get_order(size);
if (order <= MAX_ORDER)
return order;
return -ENOSYS;
@@ -4711,6 +4704,9 @@ static int __init setup_slub_min_order(char *str)
{
get_option(&str, (int *)&slub_min_order);
+ if (slub_min_order > slub_max_order)
+ slub_max_order = slub_min_order;
+
return 1;
}
@@ -4721,6 +4717,9 @@ static int __init setup_slub_max_order(char *str)
get_option(&str, (int *)&slub_max_order);
slub_max_order = min_t(unsigned int, slub_max_order, MAX_ORDER);
+ if (slub_min_order > slub_max_order)
+ slub_min_order = slub_max_order;
+
return 1;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e52f486834eb..4bc70f459164 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2530,11 +2530,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
exit_swap_address_space(p->type);
inode = mapping->host;
- if (S_ISBLK(inode->i_mode)) {
- struct block_device *bdev = I_BDEV(inode);
-
- set_blocksize(bdev, old_block_size);
- blkdev_put(bdev, p);
+ if (p->bdev_handle) {
+ set_blocksize(p->bdev, old_block_size);
+ bdev_release(p->bdev_handle);
+ p->bdev_handle = NULL;
}
inode_lock(inode);
@@ -2764,13 +2763,14 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
int error;
if (S_ISBLK(inode->i_mode)) {
- p->bdev = blkdev_get_by_dev(inode->i_rdev,
+ p->bdev_handle = bdev_open_by_dev(inode->i_rdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
- if (IS_ERR(p->bdev)) {
- error = PTR_ERR(p->bdev);
- p->bdev = NULL;
+ if (IS_ERR(p->bdev_handle)) {
+ error = PTR_ERR(p->bdev_handle);
+ p->bdev_handle = NULL;
return error;
}
+ p->bdev = p->bdev_handle->bdev;
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
@@ -3206,9 +3206,10 @@ bad_swap:
p->percpu_cluster = NULL;
free_percpu(p->cluster_next_cpu);
p->cluster_next_cpu = NULL;
- if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
+ if (p->bdev_handle) {
set_blocksize(p->bdev, p->old_block_size);
- blkdev_put(p->bdev, p);
+ bdev_release(p->bdev_handle);
+ p->bdev_handle = NULL;
}
inode = NULL;
destroy_swap_extents(p);
diff --git a/mm/util.c b/mm/util.c
index 8cbbfd3a3d59..6eddd891198e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1060,10 +1060,8 @@ void mem_dump_obj(void *object)
{
const char *type;
- if (kmem_valid_obj(object)) {
- kmem_dump_obj(object);
+ if (kmem_dump_obj(object))
return;
- }
if (vmalloc_dump_obj(object))
return;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ef8599d394fd..a3fedb3ee0db 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -111,7 +111,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t entry = pfn_pte(pfn, prot);
entry = arch_make_huge_pte(entry, ilog2(size), 0);
- set_huge_pte_at(&init_mm, addr, pte, entry);
+ set_huge_pte_at(&init_mm, addr, pte, entry, size);
pfn += PFN_DOWN(size);
continue;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 412b1409a0d7..37d2b1cb2ecb 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1219,6 +1219,19 @@ bool zswap_store(struct folio *folio)
return false;
/*
+ * If this is a duplicate, it must be removed before attempting to store
+ * it, otherwise, if the store fails the old page won't be removed from
+ * the tree, and it might be written back overriding the new data.
+ */
+ spin_lock(&tree->lock);
+ dupentry = zswap_rb_search(&tree->rbroot, offset);
+ if (dupentry) {
+ zswap_duplicate_entry++;
+ zswap_invalidate_entry(tree, dupentry);
+ }
+ spin_unlock(&tree->lock);
+
+ /*
* XXX: zswap reclaim does not work with cgroups yet. Without a
* cgroup-aware entry LRU, we will push out entries system-wide based on
* local cgroup limits.
@@ -1333,7 +1346,14 @@ insert_entry:
/* map */
spin_lock(&tree->lock);
+ /*
+ * A duplicate entry should have been removed at the beginning of this
+ * function. Since the swap entry should be pinned, if a duplicate is
+ * found again here it means that something went wrong in the swap
+ * cache.
+ */
while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
+ WARN_ON(1);
zswap_duplicate_entry++;
zswap_invalidate_entry(tree, dupentry);
}
@@ -1363,8 +1383,8 @@ reject:
shrink:
pool = zswap_pool_last_get();
- if (pool)
- queue_work(shrink_wq, &pool->shrink_work);
+ if (pool && !queue_work(shrink_wq, &pool->shrink_work))
+ zswap_pool_put(pool);
goto reject;
}