summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/execmem.c39
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/kasan/report.c34
-rw-r--r--mm/madvise.c11
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory.c17
-rw-r--r--mm/migrate_device.c13
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/zswap.c35
11 files changed, 110 insertions, 52 deletions
diff --git a/mm/execmem.c b/mm/execmem.c
index 317b6a8d35be..e6c4f5076ca8 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -257,7 +257,6 @@ out_unlock:
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
- unsigned long start, end;
struct vm_struct *vm;
size_t alloc_size;
int err = -ENOMEM;
@@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
- start = (unsigned long)p;
- end = start + alloc_size;
-
- vunmap_range(start, end);
-
- err = execmem_set_direct_map_valid(vm, false);
- if (err)
- goto err_free_mem;
-
- err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages,
- PMD_SHIFT);
+ err = set_memory_rox((unsigned long)p, vm->nr_pages);
if (err)
goto err_free_mem;
err = execmem_cache_add(p, alloc_size);
if (err)
- goto err_free_mem;
+ goto err_reset_direct_map;
return 0;
+err_reset_direct_map:
+ execmem_set_direct_map_valid(vm, true);
err_free_mem:
vfree(p);
return err;
@@ -344,6 +335,28 @@ static bool execmem_cache_free(void *ptr)
return true;
}
+
+int execmem_make_temp_rw(void *ptr, size_t size)
+{
+ unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long addr = (unsigned long)ptr;
+ int ret;
+
+ ret = set_memory_nx(addr, nr);
+ if (ret)
+ return ret;
+
+ return set_memory_rw(addr, nr);
+}
+
+int execmem_restore_rox(void *ptr, size_t size)
+{
+ unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long addr = (unsigned long)ptr;
+
+ return set_memory_rox(addr, nr);
+}
+
#else /* CONFIG_ARCH_HAS_EXECMEM_ROX */
static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
{
diff --git a/mm/filemap.c b/mm/filemap.c
index 804d7365680c..d4564a79eb35 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -445,7 +445,7 @@ EXPORT_SYMBOL(filemap_fdatawrite_range);
* filemap_fdatawrite_range_kick - start writeback on a range
* @mapping: target address_space
* @start: index to start writeback on
- * @end: last (non-inclusive) index for writeback
+ * @end: last (inclusive) index for writeback
*
* This is a non-integrity writeback helper, to start writing back folios
* for the indicated range.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 65068671e460..09d6673040ed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3145,7 +3145,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
/* do node specific alloc */
if (nid != NUMA_NO_NODE) {
- m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h),
+ m = memblock_alloc_exact_nid_raw(huge_page_size(h), huge_page_size(h),
0, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
if (!m)
return 0;
@@ -5447,7 +5447,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
if (src_ptl != dst_ptl)
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
- pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
+ pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
huge_pte_clear(mm, new_addr, dst_pte, sz);
@@ -5622,7 +5622,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
}
- pte = huge_ptep_get_and_clear(mm, address, ptep);
+ pte = huge_ptep_get_and_clear(mm, address, ptep, sz);
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
if (huge_pte_dirty(pte))
set_page_dirty(page);
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 3fe77a360f1c..8357e1a33699 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -370,6 +370,36 @@ static inline bool init_task_stack_addr(const void *addr)
sizeof(init_thread_union.stack));
}
+/*
+ * This function is invoked with report_lock (a raw_spinlock) held. A
+ * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping
+ * rt_spinlock.
+ *
+ * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a
+ * lockdep warning for this raw_spinlock -> spinlock dependency. This config
+ * option is enabled by default to ensure better test coverage to expose this
+ * kind of RT kernel problem. This lockdep splat, however, can be suppressed
+ * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the
+ * invalid PREEMPT_RT case has been taken care of.
+ */
+static inline struct vm_struct *kasan_find_vm_area(void *addr)
+{
+ static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP);
+ struct vm_struct *va;
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return NULL;
+
+ /*
+ * Suppress lockdep warning and fetch vmalloc area of the
+ * offending address.
+ */
+ lock_map_acquire_try(&vmalloc_map);
+ va = find_vm_area(addr);
+ lock_map_release(&vmalloc_map);
+ return va;
+}
+
static void print_address_description(void *addr, u8 tag,
struct kasan_report_info *info)
{
@@ -399,7 +429,7 @@ static void print_address_description(void *addr, u8 tag,
}
if (is_vmalloc_addr(addr)) {
- struct vm_struct *va = find_vm_area(addr);
+ struct vm_struct *va = kasan_find_vm_area(addr);
if (va) {
pr_err("The buggy address belongs to the virtual mapping at\n"
@@ -409,6 +439,8 @@ static void print_address_description(void *addr, u8 tag,
pr_err("\n");
page = vmalloc_to_page(addr);
+ } else {
+ pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr);
}
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 49f3a75046f6..08b207f8e61e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -933,7 +933,16 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
*/
end = vma->vm_end;
}
- VM_WARN_ON(start >= end);
+ /*
+ * If the memory region between start and end was
+ * originally backed by 4kB pages and then remapped to
+ * be backed by hugepages while mmap_lock was dropped,
+ * the adjustment for hugetlb vma above may have rounded
+ * end down to the start address.
+ */
+ if (start == end)
+ return 0;
+ VM_WARN_ON(start > end);
}
if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 46f8b372d212..4de6acb9b8ec 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4166,6 +4166,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
memcg_memory_event(memcg, MEMCG_OOM);
if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
break;
+ cond_resched();
}
memcg_wb_domain_size_changed(memcg);
diff --git a/mm/memory.c b/mm/memory.c
index 539c0f7c6d54..b4d3d4893267 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pmd_t pmdval;
unsigned long start = addr;
bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
- bool direct_reclaim = false;
+ bool direct_reclaim = true;
int nr;
retry:
@@ -1734,8 +1734,10 @@ retry:
do {
bool any_skipped = false;
- if (need_resched())
+ if (need_resched()) {
+ direct_reclaim = false;
break;
+ }
nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss,
&force_flush, &force_break, &any_skipped);
@@ -1743,11 +1745,20 @@ retry:
can_reclaim_pt = false;
if (unlikely(force_break)) {
addr += nr * PAGE_SIZE;
+ direct_reclaim = false;
break;
}
} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
- if (can_reclaim_pt && addr == end)
+ /*
+ * Fast path: try to hold the pmd lock and unmap the PTE page.
+ *
+ * If the pte lock was released midway (retry case), or if the attempt
+ * to hold the pmd lock failed, then we need to recheck all pte entries
+ * to ensure they are still none, thereby preventing the pte entries
+ * from being repopulated by another thread.
+ */
+ if (can_reclaim_pt && direct_reclaim && addr == end)
direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
add_mm_rss_vec(mm, rss);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 9cf26592ac93..5bd888223cc8 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -840,20 +840,15 @@ void migrate_device_finalize(unsigned long *src_pfns,
dst = src;
}
+ if (!folio_is_zone_device(dst))
+ folio_add_lru(dst);
remove_migration_ptes(src, dst, 0);
folio_unlock(src);
-
- if (folio_is_zone_device(src))
- folio_put(src);
- else
- folio_putback_lru(src);
+ folio_put(src);
if (dst != src) {
folio_unlock(dst);
- if (folio_is_zone_device(dst))
- folio_put(dst);
- else
- folio_putback_lru(dst);
+ folio_put(dst);
}
}
}
diff --git a/mm/truncate.c b/mm/truncate.c
index e2e115adfbc5..76d8fcd89bd0 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -548,8 +548,6 @@ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio,
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- if (folio_test_dirty(folio))
- return 0;
if (folio_mapped(folio))
unmap_mapping_folio(folio);
BUG_ON(folio_mapped(folio));
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 16bfe1c694dd..88998725f1c5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_X86
"direct_map_level2_splits",
"direct_map_level3_splits",
+ "direct_map_level2_collapses",
+ "direct_map_level3_collapses",
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
"vma_lock_success",
diff --git a/mm/zswap.c b/mm/zswap.c
index 6504174fbc6a..ac9d299e7d0c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1445,9 +1445,9 @@ resched:
* main API
**********************************/
-static ssize_t zswap_store_page(struct page *page,
- struct obj_cgroup *objcg,
- struct zswap_pool *pool)
+static bool zswap_store_page(struct page *page,
+ struct obj_cgroup *objcg,
+ struct zswap_pool *pool)
{
swp_entry_t page_swpentry = page_swap_entry(page);
struct zswap_entry *entry, *old;
@@ -1456,7 +1456,7 @@ static ssize_t zswap_store_page(struct page *page,
entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
if (!entry) {
zswap_reject_kmemcache_fail++;
- return -EINVAL;
+ return false;
}
if (!zswap_compress(page, entry, pool))
@@ -1483,13 +1483,17 @@ static ssize_t zswap_store_page(struct page *page,
/*
* The entry is successfully compressed and stored in the tree, there is
- * no further possibility of failure. Grab refs to the pool and objcg.
- * These refs will be dropped by zswap_entry_free() when the entry is
- * removed from the tree.
+ * no further possibility of failure. Grab refs to the pool and objcg,
+ * charge zswap memory, and increment zswap_stored_pages.
+ * The opposite actions will be performed by zswap_entry_free()
+ * when the entry is removed from the tree.
*/
zswap_pool_get(pool);
- if (objcg)
+ if (objcg) {
obj_cgroup_get(objcg);
+ obj_cgroup_charge_zswap(objcg, entry->length);
+ }
+ atomic_long_inc(&zswap_stored_pages);
/*
* We finish initializing the entry while it's already in xarray.
@@ -1510,13 +1514,13 @@ static ssize_t zswap_store_page(struct page *page,
zswap_lru_add(&zswap_list_lru, entry);
}
- return entry->length;
+ return true;
store_failed:
zpool_free(pool->zpool, entry->handle);
compress_failed:
zswap_entry_cache_free(entry);
- return -EINVAL;
+ return false;
}
bool zswap_store(struct folio *folio)
@@ -1526,7 +1530,6 @@ bool zswap_store(struct folio *folio)
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
struct zswap_pool *pool;
- size_t compressed_bytes = 0;
bool ret = false;
long index;
@@ -1564,20 +1567,14 @@ bool zswap_store(struct folio *folio)
for (index = 0; index < nr_pages; ++index) {
struct page *page = folio_page(folio, index);
- ssize_t bytes;
- bytes = zswap_store_page(page, objcg, pool);
- if (bytes < 0)
+ if (!zswap_store_page(page, objcg, pool))
goto put_pool;
- compressed_bytes += bytes;
}
- if (objcg) {
- obj_cgroup_charge_zswap(objcg, compressed_bytes);
+ if (objcg)
count_objcg_events(objcg, ZSWPOUT, nr_pages);
- }
- atomic_long_add(nr_pages, &zswap_stored_pages);
count_vm_events(ZSWPOUT, nr_pages);
ret = true;