summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-09 11:18:47 -0800
commitfb46e22a9e3863e08aef8815df9f17d0f4b9aede (patch)
tree83e052911fa8d8d90bcf9de2796e17e19040613f /tools
parentd30e51aa7b1f6fa7dd78d4598d1e4c047fcc3fb9 (diff)
parent5e0a760b44417f7cadd79de2204d6247109558a0 (diff)
Merge tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Many singleton patches against the MM code. The patch series which are included in this merge do the following: - Peng Zhang has done some mapletree maintainance work in the series 'maple_tree: add mt_free_one() and mt_attr() helpers' 'Some cleanups of maple tree' - In the series 'mm: use memmap_on_memory semantics for dax/kmem' Vishal Verma has altered the interworking between memory-hotplug and dax/kmem so that newly added 'device memory' can more easily have its memmap placed within that newly added memory. - Matthew Wilcox continues folio-related work (including a few fixes) in the patch series 'Add folio_zero_tail() and folio_fill_tail()' 'Make folio_start_writeback return void' 'Fix fault handler's handling of poisoned tail pages' 'Convert aops->error_remove_page to ->error_remove_folio' 'Finish two folio conversions' 'More swap folio conversions' - Kefeng Wang has also contributed folio-related work in the series 'mm: cleanup and use more folio in page fault' - Jim Cromie has improved the kmemleak reporting output in the series 'tweak kmemleak report format'. - In the series 'stackdepot: allow evicting stack traces' Andrey Konovalov to permits clients (in this case KASAN) to cause eviction of no longer needed stack traces. - Charan Teja Kalla has fixed some accounting issues in the page allocator's atomic reserve calculations in the series 'mm: page_alloc: fixes for high atomic reserve caluculations'. - Dmitry Rokosov has added to the samples/ dorectory some sample code for a userspace memcg event listener application. See the series 'samples: introduce cgroup events listeners'. - Some mapletree maintanance work from Liam Howlett in the series 'maple_tree: iterator state changes'. - Nhat Pham has improved zswap's approach to writeback in the series 'workload-specific and memory pressure-driven zswap writeback'. - DAMON/DAMOS feature and maintenance work from SeongJae Park in the series 'mm/damon: let users feed and tame/auto-tune DAMOS' 'selftests/damon: add Python-written DAMON functionality tests' 'mm/damon: misc updates for 6.8' - Yosry Ahmed has improved memcg's stats flushing in the series 'mm: memcg: subtree stats flushing and thresholds'. - In the series 'Multi-size THP for anonymous memory' Ryan Roberts has added a runtime opt-in feature to transparent hugepages which improves performance by allocating larger chunks of memory during anonymous page faults. - Matthew Wilcox has also contributed some cleanup and maintenance work against eh buffer_head code int he series 'More buffer_head cleanups'. - Suren Baghdasaryan has done work on Andrea Arcangeli's series 'userfaultfd move option'. UFFDIO_MOVE permits userspace heap compaction algorithms to move userspace's pages around rather than UFFDIO_COPY'a alloc/copy/free. - Stefan Roesch has developed a 'KSM Advisor', in the series 'mm/ksm: Add ksm advisor'. This is a governor which tunes KSM's scanning aggressiveness in response to userspace's current needs. - Chengming Zhou has optimized zswap's temporary working memory use in the series 'mm/zswap: dstmem reuse optimizations and cleanups'. - Matthew Wilcox has performed some maintenance work on the writeback code, both code and within filesystems. The series is 'Clean up the writeback paths'. - Andrey Konovalov has optimized KASAN's handling of alloc and free stack traces for secondary-level allocators, in the series 'kasan: save mempool stack traces'. - Andrey also performed some KASAN maintenance work in the series 'kasan: assorted clean-ups'. - David Hildenbrand has gone to town on the rmap code. Cleanups, more pte batching, folio conversions and more. See the series 'mm/rmap: interface overhaul'. - Kinsey Ho has contributed some maintenance work on the MGLRU code in the series 'mm/mglru: Kconfig cleanup'. - Matthew Wilcox has contributed lruvec page accounting code cleanups in the series 'Remove some lruvec page accounting functions'" * tag 'mm-stable-2024-01-08-15-31' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (361 commits) mm, treewide: rename MAX_ORDER to MAX_PAGE_ORDER mm, treewide: introduce NR_PAGE_ORDERS selftests/mm: add separate UFFDIO_MOVE test for PMD splitting selftests/mm: skip test if application doesn't has root privileges selftests/mm: conform test to TAP format output selftests: mm: hugepage-mmap: conform to TAP format output selftests/mm: gup_test: conform test to TAP format output mm/selftests: hugepage-mremap: conform test to TAP format output mm/vmstat: move pgdemote_* out of CONFIG_NUMA_BALANCING mm: zsmalloc: return -ENOSPC rather than -EINVAL in zs_malloc while size is too large mm/memcontrol: remove __mod_lruvec_page_state() mm/khugepaged: use a folio more in collapse_file() slub: use a folio in __kmalloc_large_node slub: use folio APIs in free_large_kmalloc() slub: use alloc_pages_node() in alloc_slab_page() mm: remove inc/dec lruvec page state functions mm: ratelimit stat flush from workingset shrinker kasan: stop leaking stack trace handles mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE mm/mglru: add dummy pmd_dirty() ...
Diffstat (limited to 'tools')
-rw-r--r--tools/cgroup/Makefile11
-rw-r--r--tools/cgroup/cgroup_event_listener.c83
-rw-r--r--tools/include/linux/rwsem.h4
-rw-r--r--tools/include/linux/spinlock.h1
-rw-r--r--tools/include/uapi/linux/fs.h1
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt2
-rw-r--r--tools/testing/memblock/linux/mmzone.h6
-rw-r--r--tools/testing/radix-tree/linux.c45
-rw-r--r--tools/testing/radix-tree/linux/maple_tree.h2
-rw-r--r--tools/testing/radix-tree/maple.c396
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c74
-rw-r--r--tools/testing/selftests/damon/Makefile3
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py322
-rw-r--r--tools/testing/selftests/damon/access_memory.c41
-rwxr-xr-xtools/testing/selftests/damon/sysfs.sh27
-rw-r--r--tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py33
-rw-r--r--tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py55
-rw-r--r--tools/testing/selftests/mm/Makefile4
-rw-r--r--tools/testing/selftests/mm/compaction_test.c91
-rw-r--r--tools/testing/selftests/mm/cow.c183
-rw-r--r--tools/testing/selftests/mm/gup_test.c65
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c23
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c87
-rw-r--r--tools/testing/selftests/mm/khugepaged.c410
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh55
-rw-r--r--tools/testing/selftests/mm/thp_settings.c349
-rw-r--r--tools/testing/selftests/mm/thp_settings.h80
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c3
-rw-r--r--tools/testing/selftests/mm/uffd-common.c39
-rw-r--r--tools/testing/selftests/mm/uffd-common.h9
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c5
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c209
-rw-r--r--tools/testing/selftests/mm/vm_util.c80
33 files changed, 2091 insertions, 707 deletions
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
deleted file mode 100644
index ffca068e4a76..000000000000
--- a/tools/cgroup/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for cgroup tools
-
-CFLAGS = -Wall -Wextra
-
-all: cgroup_event_listener
-%: %.c
- $(CC) $(CFLAGS) -o $@ $^
-
-clean:
- $(RM) cgroup_event_listener
diff --git a/tools/cgroup/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c
deleted file mode 100644
index 3d70dc831a76..000000000000
--- a/tools/cgroup/cgroup_event_listener.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * cgroup_event_listener.c - Simple listener of cgroup events
- *
- * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
- */
-
-#include <assert.h>
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libgen.h>
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/eventfd.h>
-
-#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
-
-int main(int argc, char **argv)
-{
- int efd = -1;
- int cfd = -1;
- int event_control = -1;
- char event_control_path[PATH_MAX];
- char line[LINE_MAX];
- int ret;
-
- if (argc != 3)
- errx(1, "%s", USAGE_STR);
-
- cfd = open(argv[1], O_RDONLY);
- if (cfd == -1)
- err(1, "Cannot open %s", argv[1]);
-
- ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
- dirname(argv[1]));
- if (ret >= PATH_MAX)
- errx(1, "Path to cgroup.event_control is too long");
-
- event_control = open(event_control_path, O_WRONLY);
- if (event_control == -1)
- err(1, "Cannot open %s", event_control_path);
-
- efd = eventfd(0, 0);
- if (efd == -1)
- err(1, "eventfd() failed");
-
- ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
- if (ret >= LINE_MAX)
- errx(1, "Arguments string is too long");
-
- ret = write(event_control, line, strlen(line) + 1);
- if (ret == -1)
- err(1, "Cannot write to cgroup.event_control");
-
- while (1) {
- uint64_t result;
-
- ret = read(efd, &result, sizeof(result));
- if (ret == -1) {
- if (errno == EINTR)
- continue;
- err(1, "Cannot read from eventfd");
- }
- assert(ret == sizeof(result));
-
- ret = access(event_control_path, W_OK);
- if ((ret == -1) && (errno == ENOENT)) {
- puts("The cgroup seems to have removed.");
- break;
- }
-
- if (ret == -1)
- err(1, "cgroup.event_control is not accessible any more");
-
- printf("%s %s: crossed\n", argv[1], argv[2]);
- }
-
- return 0;
-}
diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h
index 83971b3cbfce..f8bffd4a987c 100644
--- a/tools/include/linux/rwsem.h
+++ b/tools/include/linux/rwsem.h
@@ -37,4 +37,8 @@ static inline int up_write(struct rw_semaphore *sem)
{
return pthread_rwlock_unlock(&sem->lock);
}
+
+#define down_read_nested(sem, subclass) down_read(sem)
+#define down_write_nested(sem, subclass) down_write(sem)
+
#endif /* _TOOLS_RWSEM_H */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 622266b197d0..a6cdf25b6b9d 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -11,6 +11,7 @@
#define spin_lock_init(x) pthread_mutex_init(x, NULL)
#define spin_lock(x) pthread_mutex_lock(x)
+#define spin_lock_nested(x, subclass) pthread_mutex_lock(x)
#define spin_unlock(x) pthread_mutex_unlock(x)
#define spin_lock_bh(x) pthread_mutex_lock(x)
#define spin_unlock_bh(x) pthread_mutex_unlock(x)
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index da43810b7485..48ad69f7722e 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_SWAPPED (1 << 4)
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)
/*
* struct page_region - Page region with flags
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 4c90cc176f81..2109690b0d5f 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -683,7 +683,7 @@ Buffer handling
~~~~~~~~~~~~~~~
There may be buffer limitations (i.e. single ToPa entry) which means that actual
-buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_PAGE_ORDER). In order to
provide other sizes, and in particular an arbitrarily large size, multiple
buffers are logically concatenated. However an interrupt must be used to switch
between buffers. That has two potential problems:
diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h
index 134f8eab0768..71546e15bdd3 100644
--- a/tools/testing/memblock/linux/mmzone.h
+++ b/tools/testing/memblock/linux/mmzone.h
@@ -17,10 +17,10 @@ enum zone_type {
};
#define MAX_NR_ZONES __MAX_NR_ZONES
-#define MAX_ORDER 10
-#define MAX_ORDER_NR_PAGES (1 << MAX_ORDER)
+#define MAX_PAGE_ORDER 10
+#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER)
-#define pageblock_order MAX_ORDER
+#define pageblock_order MAX_PAGE_ORDER
#define pageblock_nr_pages BIT(pageblock_order)
#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages)
#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages)
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 61fe2601cb3a..4eb442206d01 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -93,13 +93,9 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
return p;
}
-void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
+void __kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
{
assert(objp);
- uatomic_dec(&nr_allocated);
- uatomic_dec(&cachep->nr_allocated);
- if (kmalloc_verbose)
- printf("Freeing %p to slab\n", objp);
if (cachep->nr_objs > 10 || cachep->align) {
memset(objp, POISON_FREE, cachep->size);
free(objp);
@@ -111,6 +107,15 @@ void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
}
}
+void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
+{
+ uatomic_dec(&nr_allocated);
+ uatomic_dec(&cachep->nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to slab\n", objp);
+ __kmem_cache_free_locked(cachep, objp);
+}
+
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
pthread_mutex_lock(&cachep->lock);
@@ -141,18 +146,17 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
if (kmalloc_verbose)
pr_debug("Bulk alloc %lu\n", size);
- if (!(gfp & __GFP_DIRECT_RECLAIM)) {
- if (cachep->non_kernel < size)
- return 0;
-
- cachep->non_kernel -= size;
- }
-
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs >= size) {
struct radix_tree_node *node;
for (i = 0; i < size; i++) {
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (!cachep->non_kernel)
+ break;
+ cachep->non_kernel--;
+ }
+
node = cachep->objs;
cachep->nr_objs--;
cachep->objs = node->parent;
@@ -163,11 +167,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
} else {
pthread_mutex_unlock(&cachep->lock);
for (i = 0; i < size; i++) {
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (!cachep->non_kernel)
+ break;
+ cachep->non_kernel--;
+ }
+
if (cachep->align) {
posix_memalign(&p[i], cachep->align,
cachep->size);
} else {
p[i] = malloc(cachep->size);
+ if (!p[i])
+ break;
}
if (cachep->ctor)
cachep->ctor(p[i]);
@@ -176,6 +188,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
}
}
+ if (i < size) {
+ size = i;
+ pthread_mutex_lock(&cachep->lock);
+ for (i = 0; i < size; i++)
+ __kmem_cache_free_locked(cachep, p[i]);
+ pthread_mutex_unlock(&cachep->lock);
+ return 0;
+ }
+
for (i = 0; i < size; i++) {
uatomic_inc(&nr_allocated);
uatomic_inc(&cachep->nr_allocated);
diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h
index 7d8d1f445b89..06c89bdcc515 100644
--- a/tools/testing/radix-tree/linux/maple_tree.h
+++ b/tools/testing/radix-tree/linux/maple_tree.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0+ */
#define atomic_t int32_t
-#include "../../../../include/linux/maple_tree.h"
#define atomic_inc(x) uatomic_inc(x)
#define atomic_read(x) uatomic_read(x)
#define atomic_set(x, y) do {} while (0)
#define U8_MAX UCHAR_MAX
+#include "../../../../include/linux/maple_tree.h"
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 76a8990bb14e..f1caf4bcf937 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -118,6 +118,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas.alloc == NULL);
MT_BUG_ON(mt, mas.alloc->slot[0] == NULL);
mas_push_node(&mas, mn);
+ mas_reset(&mas);
mas_nomem(&mas, GFP_KERNEL); /* free */
mtree_unlock(mt);
@@ -141,7 +142,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
- mas.node = MAS_START;
+ mas.status = ma_start;
mas_nomem(&mas, GFP_KERNEL);
/* Allocate 3 nodes, will fail. */
mas_node_count(&mas, 3);
@@ -158,6 +159,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
/* Ensure we counted 3. */
MT_BUG_ON(mt, mas_allocated(&mas) != 3);
/* Free. */
+ mas_reset(&mas);
mas_nomem(&mas, GFP_KERNEL);
/* Set allocation request to 1. */
@@ -272,6 +274,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
}
+ mas_reset(&mas);
MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL));
}
@@ -294,6 +297,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
smn = smn->slot[0]; /* next. */
}
MT_BUG_ON(mt, mas_allocated(&mas) != total);
+ mas_reset(&mas);
mas_nomem(&mas, GFP_KERNEL); /* Free. */
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@@ -441,7 +445,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, 10); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.node = MAS_START;
+ mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != 10);
mas_destroy(&mas);
@@ -452,7 +456,7 @@ static noinline void __init check_new_node(struct maple_tree *mt)
mas.node = MA_ERROR(-ENOMEM);
mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
- mas.node = MAS_START;
+ mas.status = ma_start;
MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1);
mas_destroy(&mas);
@@ -941,10 +945,11 @@ retry:
ret = mas_descend_walk(mas, range_min, range_max);
if (unlikely(mte_dead_node(mas->node))) {
- mas->node = MAS_START;
+ mas->status = ma_start;
goto retry;
}
+ mas->end = mas_data_end(mas);
return ret;
not_found:
@@ -960,17 +965,19 @@ static inline void *mas_range_load(struct ma_state *mas,
unsigned long index = mas->index;
if (mas_is_none(mas) || mas_is_paused(mas))
- mas->node = MAS_START;
+ mas->status = ma_start;
retry:
if (mas_tree_walk(mas, range_min, range_max))
- if (unlikely(mas->node == MAS_ROOT))
+ if (unlikely(mas->status == ma_root))
return mas_root(mas);
if (likely(mas->offset != MAPLE_NODE_SLOTS))
entry = mas_get_slot(mas, mas->offset);
- if (mas_dead_node(mas, index))
+ if (mas_is_active(mas) && mte_dead_node(mas->node)) {
+ mas_set(mas, index);
goto retry;
+ }
return entry;
}
@@ -34132,7 +34139,7 @@ STORE, 140501948112896, 140501948116991,
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_erase2_testset(mt, set27, ARRAY_SIZE(set27));
rcu_barrier();
- MT_BUG_ON(mt, 0 != mtree_load(mt, 140415537422336));
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 140415537422336));
mt_set_non_kernel(0);
mt_validate(mt);
mtree_destroy(mt);
@@ -34256,7 +34263,7 @@ STORE, 140501948112896, 140501948116991,
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_erase2_testset(mt, set37, ARRAY_SIZE(set37));
rcu_barrier();
- MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712));
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712));
mt_validate(mt);
mtree_destroy(mt);
@@ -34264,7 +34271,7 @@ STORE, 140501948112896, 140501948116991,
mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
check_erase2_testset(mt, set38, ARRAY_SIZE(set38));
rcu_barrier();
- MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712));
+ MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712));
mt_validate(mt);
mtree_destroy(mt);
@@ -35336,7 +35343,7 @@ static void mas_dfs_preorder(struct ma_state *mas)
unsigned char end, slot = 0;
unsigned long *pivots;
- if (mas->node == MAS_START) {
+ if (mas->status == ma_start) {
mas_start(mas);
return;
}
@@ -35373,7 +35380,7 @@ walk_up:
return;
done:
- mas->node = MAS_NONE;
+ mas->status = ma_none;
}
@@ -35832,7 +35839,7 @@ static noinline void __init check_nomem(struct maple_tree *mt)
mas_store(&ms, &ms); /* insert 1 -> &ms, fails. */
MT_BUG_ON(mt, ms.node != MA_ERROR(-ENOMEM));
mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */
- MT_BUG_ON(mt, ms.node != MAS_START);
+ MT_BUG_ON(mt, ms.status != ma_start);
mtree_unlock(mt);
MT_BUG_ON(mt, mtree_insert(mt, 2, mt, GFP_KERNEL) != 0);
mtree_lock(mt);
@@ -35857,6 +35864,363 @@ static noinline void __init check_locky(struct maple_tree *mt)
mt_clear_in_rcu(mt);
}
+/*
+ * Compares two nodes except for the addresses stored in the nodes.
+ * Returns zero if they are the same, otherwise returns non-zero.
+ */
+static int __init compare_node(struct maple_enode *enode_a,
+ struct maple_enode *enode_b)
+{
+ struct maple_node *node_a, *node_b;
+ struct maple_node a, b;
+ void **slots_a, **slots_b; /* Do not use the rcu tag. */
+ enum maple_type type;
+ int i;
+
+ if (((unsigned long)enode_a & MAPLE_NODE_MASK) !=
+ ((unsigned long)enode_b & MAPLE_NODE_MASK)) {
+ pr_err("The lower 8 bits of enode are different.\n");
+ return -1;
+ }
+
+ type = mte_node_type(enode_a);
+ node_a = mte_to_node(enode_a);
+ node_b = mte_to_node(enode_b);
+ a = *node_a;
+ b = *node_b;
+
+ /* Do not compare addresses. */
+ if (ma_is_root(node_a) || ma_is_root(node_b)) {
+ a.parent = (struct maple_pnode *)((unsigned long)a.parent &
+ MA_ROOT_PARENT);
+ b.parent = (struct maple_pnode *)((unsigned long)b.parent &
+ MA_ROOT_PARENT);
+ } else {
+ a.parent = (struct maple_pnode *)((unsigned long)a.parent &
+ MAPLE_NODE_MASK);
+ b.parent = (struct maple_pnode *)((unsigned long)b.parent &
+ MAPLE_NODE_MASK);
+ }
+
+ if (a.parent != b.parent) {
+ pr_err("The lower 8 bits of parents are different. %p %p\n",
+ a.parent, b.parent);
+ return -1;
+ }
+
+ /*
+ * If it is a leaf node, the slots do not contain the node address, and
+ * no special processing of slots is required.
+ */
+ if (ma_is_leaf(type))
+ goto cmp;
+
+ slots_a = ma_slots(&a, type);
+ slots_b = ma_slots(&b, type);
+
+ for (i = 0; i < mt_slots[type]; i++) {
+ if (!slots_a[i] && !slots_b[i])
+ break;
+
+ if (!slots_a[i] || !slots_b[i]) {
+ pr_err("The number of slots is different.\n");
+ return -1;
+ }
+
+ /* Do not compare addresses in slots. */
+ ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK;
+ ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK;
+ }
+
+cmp:
+ /*
+ * Compare all contents of two nodes, including parent (except address),
+ * slots (except address), pivots, gaps and metadata.
+ */
+ return memcmp(&a, &b, sizeof(struct maple_node));
+}
+
+/*
+ * Compare two trees and return 0 if they are the same, non-zero otherwise.
+ */
+static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b)
+{
+ MA_STATE(mas_a, mt_a, 0, 0);
+ MA_STATE(mas_b, mt_b, 0, 0);
+
+ if (mt_a->ma_flags != mt_b->ma_flags) {
+ pr_err("The flags of the two trees are different.\n");
+ return -1;
+ }
+
+ mas_dfs_preorder(&mas_a);
+ mas_dfs_preorder(&mas_b);
+
+ if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) {
+ if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) {
+ pr_err("One is ma_root and the other is not.\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) {
+
+ if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) {
+ pr_err("One is ma_none and the other is not.\n");
+ return -1;
+ }
+
+ if (mas_a.min != mas_b.min ||
+ mas_a.max != mas_b.max) {
+ pr_err("mas->min, mas->max do not match.\n");
+ return -1;
+ }
+
+ if (compare_node(mas_a.node, mas_b.node)) {
+ pr_err("The contents of nodes %p and %p are different.\n",
+ mas_a.node, mas_b.node);
+ mt_dump(mt_a, mt_dump_dec);
+ mt_dump(mt_b, mt_dump_dec);
+ return -1;
+ }
+
+ mas_dfs_preorder(&mas_a);
+ mas_dfs_preorder(&mas_b);
+ }
+
+ return 0;
+}
+
+static __init void mas_subtree_max_range(struct ma_state *mas)
+{
+ unsigned long limit = mas->max;
+ MA_STATE(newmas, mas->tree, 0, 0);
+ void *entry;
+
+ mas_for_each(mas, entry, limit) {
+ if (mas->last - mas->index >=
+ newmas.last - newmas.index) {
+ newmas = *mas;
+ }
+ }
+
+ *mas = newmas;
+}
+
+/*
+ * build_full_tree() - Build a full tree.
+ * @mt: The tree to build.
+ * @flags: Use @flags to build the tree.
+ * @height: The height of the tree to build.
+ *
+ * Build a tree with full leaf nodes and internal nodes. Note that the height
+ * should not exceed 3, otherwise it will take a long time to build.
+ * Return: zero if the build is successful, non-zero if it fails.
+ */
+static __init int build_full_tree(struct maple_tree *mt, unsigned int flags,
+ int height)
+{
+ MA_STATE(mas, mt, 0, 0);
+ unsigned long step;
+ int ret = 0, cnt = 1;
+ enum maple_type type;
+
+ mt_init_flags(mt, flags);
+ mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL);
+
+ mtree_lock(mt);
+
+ while (1) {
+ mas_set(&mas, 0);
+ if (mt_height(mt) < height) {
+ mas.max = ULONG_MAX;
+ goto store;
+ }
+
+ while (1) {
+ mas_dfs_preorder(&mas);
+ if (mas_is_none(&mas))
+ goto unlock;
+
+ type = mte_node_type(mas.node);
+ if (mas_data_end(&mas) + 1 < mt_slots[type]) {
+ mas_set(&mas, mas.min);
+ goto store;
+ }
+ }
+store:
+ mas_subtree_max_range(&mas);
+ step = mas.last - mas.index;
+ if (step < 1) {
+ ret = -1;
+ goto unlock;
+ }
+
+ step /= 2;
+ mas.last = mas.index + step;
+ mas_store_gfp(&mas, xa_mk_value(5),
+ GFP_KERNEL);
+ ++cnt;
+ }
+unlock:
+ mtree_unlock(mt);
+
+ MT_BUG_ON(mt, mt_height(mt) != height);
+ /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */
+ return ret;
+}
+
+static noinline void __init check_mtree_dup(struct maple_tree *mt)
+{
+ DEFINE_MTREE(new);
+ int i, j, ret, count = 0;
+ unsigned int rand_seed = 17, rand;
+
+ /* store a value at [0, 0] */
+ mt_init_flags(mt, 0);
+ mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* The two trees have different attributes. */
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret != -EINVAL);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* The new tree is not empty */
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL);
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret != -EINVAL);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* Test for duplicating full trees. */
+ for (i = 1; i <= 3; i++) {
+ ret = build_full_tree(mt, 0, i);
+ MT_BUG_ON(mt, ret);
+ mt_init_flags(&new, 0);
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ for (i = 1; i <= 3; i++) {
+ ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i);
+ MT_BUG_ON(mt, ret);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* Test for normal duplicating. */
+ for (i = 0; i < 1000; i += 3) {
+ if (i & 1) {
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ } else {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ }
+
+ for (j = 0; j < i; j++) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+
+ ret = mtree_dup(mt, &new, GFP_KERNEL);
+ MT_BUG_ON(&new, ret);
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* Test memory allocation failed. */
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ for (i = 0; i < 30; i += 3) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+
+ /* Failed at the first node. */
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ mt_set_non_kernel(0);
+ ret = mtree_dup(mt, &new, GFP_NOWAIT);
+ mt_set_non_kernel(0);
+ MT_BUG_ON(&new, ret != -ENOMEM);
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+
+ /* Random maple tree fails at a random node. */
+ for (i = 0; i < 1000; i += 3) {
+ if (i & 1) {
+ mt_init_flags(mt, 0);
+ mt_init_flags(&new, 0);
+ } else {
+ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE);
+ mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE);
+ }
+
+ for (j = 0; j < i; j++) {
+ mtree_store_range(mt, j * 10, j * 10 + 5,
+ xa_mk_value(j), GFP_KERNEL);
+ }
+ /*
+ * The rand() library function is not used, so we can generate
+ * the same random numbers on any platform.
+ */
+ rand_seed = rand_seed * 1103515245 + 12345;
+ rand = rand_seed / 65536 % 128;
+ mt_set_non_kernel(rand);
+
+ ret = mtree_dup(mt, &new, GFP_NOWAIT);
+ mt_set_non_kernel(0);
+ if (ret != 0) {
+ MT_BUG_ON(&new, ret != -ENOMEM);
+ count++;
+ mtree_destroy(mt);
+ continue;
+ }
+
+ mt_validate(&new);
+ if (compare_tree(mt, &new))
+ MT_BUG_ON(&new, 1);
+
+ mtree_destroy(mt);
+ mtree_destroy(&new);
+ }
+
+ /* pr_info("mtree_dup() fail %d times\n", count); */
+ BUG_ON(!count);
+}
+
extern void test_kmem_cache_bulk(void);
void farmer_tests(void)
@@ -35904,6 +36268,10 @@ void farmer_tests(void)
check_null_expand(&tree);
mtree_destroy(&tree);
+ mt_init_flags(&tree, 0);
+ check_mtree_dup(&tree);
+ mtree_destroy(&tree);
+
/* RCU testing */
mt_init_flags(&tree, 0);
check_erase_testset(&tree);
@@ -35938,7 +36306,9 @@ void farmer_tests(void)
void maple_tree_tests(void)
{
+#if !defined(BENCH)
farmer_tests();
+#endif
maple_tree_seed();
maple_tree_harvest();
}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index c99d2adaca3f..47fdaa146443 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -50,9 +50,9 @@ static int get_zswap_stored_pages(size_t *value)
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
}
-static int get_zswap_written_back_pages(size_t *value)
+static int get_cg_wb_count(const char *cg)
{
- return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
+ return cg_read_key_long(cg, "memory.stat", "zswp_wb");
}
static long get_zswpout(const char *cgroup)
@@ -73,6 +73,24 @@ static int allocate_bytes(const char *cgroup, void *arg)
return 0;
}
+static char *setup_test_group_1M(const char *root, const char *name)
+{
+ char *group_name = cg_name(root, name);
+
+ if (!group_name)
+ return NULL;
+ if (cg_create(group_name))
+ goto fail;
+ if (cg_write(group_name, "memory.max", "1M")) {
+ cg_destroy(group_name);
+ goto fail;
+ }
+ return group_name;
+fail:
+ free(group_name);
+ return NULL;
+}
+
/*
* Sanity test to check that pages are written into zswap.
*/
@@ -117,43 +135,51 @@ out:
/*
* When trying to store a memcg page in zswap, if the memcg hits its memory
- * limit in zswap, writeback should not be triggered.
- *
- * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may
- * not zswap"). Needs to be revised when a per memcg writeback mechanism is
- * implemented.
+ * limit in zswap, writeback should affect only the zswapped pages of that
+ * memcg.
*/
static int test_no_invasive_cgroup_shrink(const char *root)
{
- size_t written_back_before, written_back_after;
int ret = KSFT_FAIL;
- char *test_group;
+ size_t control_allocation_size = MB(10);
+ char *control_allocation, *wb_group = NULL, *control_group = NULL;
/* Set up */
- test_group = cg_name(root, "no_shrink_test");
- if (!test_group)
- goto out;
- if (cg_create(test_group))
+ wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
+ if (!wb_group)
+ return KSFT_FAIL;
+ if (cg_write(wb_group, "memory.zswap.max", "10K"))
goto out;
- if (cg_write(test_group, "memory.max", "1M"))
+ control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
+ if (!control_group)
goto out;
- if (cg_write(test_group, "memory.zswap.max", "10K"))
+
+ /* Push some test_group2 memory into zswap */
+ if (cg_enter_current(control_group))
goto out;
- if (get_zswap_written_back_pages(&written_back_before))
+ control_allocation = malloc(control_allocation_size);
+ for (int i = 0; i < control_allocation_size; i += 4095)
+ control_allocation[i] = 'a';
+ if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
goto out;
- /* Allocate 10x memory.max to push memory into zswap */
- if (cg_run(test_group, allocate_bytes, (void *)MB(10)))
+ /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
+ if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
goto out;
- /* Verify that no writeback happened because of the memcg allocation */
- if (get_zswap_written_back_pages(&written_back_after))
- goto out;
- if (written_back_after == written_back_before)
+ /* Verify that only zswapped memory from gwb_group has been written back */
+ if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
ret = KSFT_PASS;
out:
- cg_destroy(test_group);
- free(test_group);
+ cg_enter_current(root);
+ if (control_group) {
+ cg_destroy(control_group);
+ free(control_group);
+ }
+ cg_destroy(wb_group);
+ free(wb_group);
+ if (control_allocation)
+ free(control_allocation);
return ret;
}
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index b71247ba7196..8a1cc2bf1864 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -2,6 +2,7 @@
# Makefile for damon selftests
TEST_GEN_FILES += huge_count_read_write
+TEST_GEN_FILES += access_memory
TEST_FILES = _chk_dependency.sh _debugfs_common.sh
TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
@@ -9,6 +10,8 @@ TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
TEST_PROGS += debugfs_duplicate_context_creation.sh
TEST_PROGS += debugfs_rm_non_contexts.sh
TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
+TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
+TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
TEST_PROGS += reclaim.sh lru_sort.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
new file mode 100644
index 000000000000..e98cf4b6a4b7
--- /dev/null
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -0,0 +1,322 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+
+sysfs_root = '/sys/kernel/mm/damon/admin'
+
+def write_file(path, string):
+ "Returns error string if failed, or None otherwise"
+ string = '%s' % string
+ try:
+ with open(path, 'w') as f:
+ f.write(string)
+ except Exception as e:
+ return '%s' % e
+ return None
+
+def read_file(path):
+ '''Returns the read content and error string. The read content is None if
+ the reading failed'''
+ try:
+ with open(path, 'r') as f:
+ return f.read(), None
+ except Exception as e:
+ return None, '%s' % e
+
+class DamosAccessPattern:
+ size = None
+ nr_accesses = None
+ age = None
+ scheme = None
+
+ def __init__(self, size=None, nr_accesses=None, age=None):
+ self.size = size
+ self.nr_accesses = nr_accesses
+ self.age = age
+
+ if self.size == None:
+ self.size = [0, 2**64 - 1]
+ if self.nr_accesses == None:
+ self.nr_accesses = [0, 2**64 - 1]
+ if self.age == None:
+ self.age = [0, 2**64 - 1]
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'access_pattern')
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1])
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'),
+ self.nr_accesses[0])
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'),
+ self.nr_accesses[1])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1])
+ if err != None:
+ return err
+
+class Damos:
+ action = None
+ access_pattern = None
+ # todo: Support quotas, watermarks, stats, tried_regions
+ idx = None
+ context = None
+ tried_bytes = None
+
+ def __init__(self, action='stat', access_pattern=DamosAccessPattern()):
+ self.action = action
+ self.access_pattern = access_pattern
+ self.access_pattern.scheme = self
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'schemes', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action)
+ if err != None:
+ return err
+ err = self.access_pattern.stage()
+ if err != None:
+ return err
+
+ # disable quotas
+ err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0')
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0')
+ if err != None:
+ return err
+
+ # disable watermarks
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
+ if err != None:
+ return err
+
+ # disable filters
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
+ if err != None:
+ return err
+
+class DamonTarget:
+ pid = None
+ # todo: Support target regions if test is made
+ idx = None
+ context = None
+
+ def __init__(self, pid):
+ self.pid = pid
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'targets', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
+ if err != None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+
+class DamonAttrs:
+ sample_us = None
+ aggr_us = None
+ update_us = None
+ min_nr_regions = None
+ max_nr_regions = None
+ context = None
+
+ def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000,
+ min_nr_regions=10, max_nr_regions=1000):
+ self.sample_us = sample_us
+ self.aggr_us = aggr_us
+ self.update_us = update_us
+ self.min_nr_regions = min_nr_regions
+ self.max_nr_regions = max_nr_regions
+
+ def interval_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'intervals')
+
+ def nr_regions_range_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'nr_regions')
+
+ def stage(self):
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'),
+ self.sample_us)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'),
+ self.aggr_us)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
+ self.update_us)
+ if err != None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'min'),
+ self.min_nr_regions)
+ if err != None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'max'),
+ self.max_nr_regions)
+ if err != None:
+ return err
+
+class DamonCtx:
+ ops = None
+ monitoring_attrs = None
+ targets = None
+ schemes = None
+ kdamond = None
+ idx = None
+
+ def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[],
+ schemes=[]):
+ self.ops = ops
+ self.monitoring_attrs = monitoring_attrs
+ self.monitoring_attrs.context = self
+
+ self.targets = targets
+ for idx, target in enumerate(self.targets):
+ target.idx = idx
+ target.context = self
+
+ self.schemes = schemes
+ for idx, scheme in enumerate(self.schemes):
+ scheme.idx = idx
+ scheme.context = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamond.sysfs_dir(), 'contexts',
+ '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'operations'), self.ops)
+ if err != None:
+ return err
+ err = self.monitoring_attrs.stage()
+ if err != None:
+ return err
+
+ nr_targets_file = os.path.join(
+ self.sysfs_dir(), 'targets', 'nr_targets')
+ content, err = read_file(nr_targets_file)
+ if err != None:
+ return err
+ if int(content) != len(self.targets):
+ err = write_file(nr_targets_file, '%d' % len(self.targets))
+ if err != None:
+ return err
+ for target in self.targets:
+ err = target.stage()
+ if err != None:
+ return err
+
+ nr_schemes_file = os.path.join(
+ self.sysfs_dir(), 'schemes', 'nr_schemes')
+ content, err = read_file(nr_schemes_file)
+ if int(content) != len(self.schemes):
+ err = write_file(nr_schemes_file, '%d' % len(self.schemes))
+ if err != None:
+ return err
+ for scheme in self.schemes:
+ err = scheme.stage()
+ if err != None:
+ return err
+ return None
+
+class Kdamond:
+ state = None
+ pid = None
+ contexts = None
+ idx = None # index of this kdamond between siblings
+ kdamonds = None # parent
+
+ def __init__(self, contexts=[]):
+ self.contexts = contexts
+ for idx, context in enumerate(self.contexts):
+ context.idx = idx
+ context.kdamond = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx)
+
+ def start(self):
+ nr_contexts_file = os.path.join(self.sysfs_dir(),
+ 'contexts', 'nr_contexts')
+ content, err = read_file(nr_contexts_file)
+ if err != None:
+ return err
+ if int(content) != len(self.contexts):
+ err = write_file(nr_contexts_file, '%d' % len(self.contexts))
+ if err != None:
+ return err
+
+ for context in self.contexts:
+ err = context.stage()
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
+ return err
+
+ def update_schemes_tried_bytes(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_tried_bytes')
+ if err != None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ content, err = read_file(os.path.join(scheme.sysfs_dir(),
+ 'tried_regions', 'total_bytes'))
+ if err != None:
+ return err
+ scheme.tried_bytes = int(content)
+
+class Kdamonds:
+ kdamonds = []
+
+ def __init__(self, kdamonds=[]):
+ self.kdamonds = kdamonds
+ for idx, kdamond in enumerate(self.kdamonds):
+ kdamond.idx = idx
+ kdamond.kdamonds = self
+
+ def sysfs_dir(self):
+ return os.path.join(sysfs_root, 'kdamonds')
+
+ def start(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'),
+ '%s' % len(self.kdamonds))
+ if err != None:
+ return err
+ for kdamond in self.kdamonds:
+ err = kdamond.start()
+ if err != None:
+ return err
+ return None
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
new file mode 100644
index 000000000000..585a2fa54329
--- /dev/null
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Artificial memory access program for testing DAMON.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+int main(int argc, char *argv[])
+{
+ char **regions;
+ clock_t start_clock;
+ int nr_regions;
+ int sz_region;
+ int access_time_ms;
+ int i;
+
+ if (argc != 4) {
+ printf("Usage: %s <number> <size (bytes)> <time (ms)>\n",
+ argv[0]);
+ return -1;
+ }
+
+ nr_regions = atoi(argv[1]);
+ sz_region = atoi(argv[2]);
+ access_time_ms = atoi(argv[3]);
+
+ regions = malloc(sizeof(*regions) * nr_regions);
+ for (i = 0; i < nr_regions; i++)
+ regions[i] = malloc(sz_region);
+
+ for (i = 0; i < nr_regions; i++) {
+ start_clock = clock();
+ while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
+ access_time_ms)
+ memset(regions[i], i, 1024 * 1024 * 10);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 56f0230a8b92..e9a976d296e2 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -150,6 +150,32 @@ test_weights()
ensure_file "$weights_dir/age_permil" "exist" "600"
}
+test_goal()
+{
+ goal_dir=$1
+ ensure_dir "$goal_dir" "exist"
+ ensure_file "$goal_dir/target_value" "exist" "600"
+ ensure_file "$goal_dir/current_value" "exist" "600"
+}
+
+test_goals()
+{
+ goals_dir=$1
+ ensure_dir "$goals_dir" "exist"
+ ensure_file "$goals_dir/nr_goals" "exist" "600"
+
+ ensure_write_succ "$goals_dir/nr_goals" "1" "valid input"
+ test_goal "$goals_dir/0"
+
+ ensure_write_succ "$goals_dir/nr_goals" "2" "valid input"
+ test_goal "$goals_dir/0"
+ test_goal "$goals_dir/1"
+
+ ensure_write_succ "$goals_dir/nr_goals" "0" "valid input"
+ ensure_dir "$goals_dir/0" "not_exist"
+ ensure_dir "$goals_dir/1" "not_exist"
+}
+
test_quotas()
{
quotas_dir=$1
@@ -158,6 +184,7 @@ test_quotas()
ensure_file "$quotas_dir/bytes" "exist" 600
ensure_file "$quotas_dir/reset_interval_ms" "exist" 600
test_weights "$quotas_dir/weights"
+ test_goals "$quotas_dir/goals"
}
test_access_pattern()
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
new file mode 100644
index 000000000000..8c690ba1a573
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ proc = subprocess.Popen(['sleep', '2'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ nr_accesses=[200, 200]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdmaond start failed: %s' % err)
+ exit(1)
+
+ while proc.poll() == None:
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
new file mode 100644
index 000000000000..cdbf19b442c9
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdmaond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+ wss_collected.append(
+ kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes)
+
+ wss_collected.sort()
+ acceptable_error_rate = 0.2
+ for percentile in [50, 75]:
+ sample = wss_collected[int(len(wss_collected) * percentile / 100)]
+ error_rate = abs(sample - sz_region) / sz_region
+ print('%d-th percentile (%d) error %f' %
+ (percentile, sample, error_rate))
+ if error_rate > acceptable_error_rate:
+ print('the error rate is not acceptable (> %f)' %
+ acceptable_error_rate)
+ print('samples are as below')
+ print('\n'.join(['%d' % wss for wss in wss_collected]))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index dede0bcf97a3..2453add65d12 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -117,8 +117,8 @@ TEST_FILES += va_high_addr_switch.sh
include ../lib.mk
-$(TEST_GEN_PROGS): vm_util.c
-$(TEST_GEN_FILES): vm_util.c
+$(TEST_GEN_PROGS): vm_util.c thp_settings.c
+$(TEST_GEN_FILES): vm_util.c thp_settings.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 9b420140ba2b..656afba02dbc 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -33,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
FILE *cmdfile = popen(cmd, "r");
if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
- perror("Failed to read meminfo\n");
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
return -1;
}
@@ -44,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
cmdfile = popen(cmd, "r");
if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
- perror("Failed to read meminfo\n");
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
return -1;
}
@@ -62,14 +62,14 @@ int prereq(void)
fd = open("/proc/sys/vm/compact_unevictable_allowed",
O_RDONLY | O_NONBLOCK);
if (fd < 0) {
- perror("Failed to open\n"
- "/proc/sys/vm/compact_unevictable_allowed\n");
+ ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
return -1;
}
if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
- perror("Failed to read from\n"
- "/proc/sys/vm/compact_unevictable_allowed\n");
+ ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
close(fd);
return -1;
}
@@ -78,12 +78,13 @@ int prereq(void)
if (allowed == '1')
return 0;
+ ksft_print_msg("Compaction isn't allowed\n");
return -1;
}
int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
{
- int fd;
+ int fd, ret = -1;
int compaction_index = 0;
char initial_nr_hugepages[10] = {0};
char nr_hugepages[10] = {0};
@@ -94,18 +95,21 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
if (fd < 0) {
- perror("Failed to open /proc/sys/vm/nr_hugepages");
+ ksft_test_result_fail("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
return -1;
}
if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
- perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ ksft_test_result_fail("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
/* Start with the initial condition of 0 huge pages*/
if (write(fd, "0", sizeof(char)) != sizeof(char)) {
- perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n");
+ ksft_test_result_fail("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
@@ -114,14 +118,16 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
/* Request a large number of huge pages. The Kernel will allocate
as much as it can */
if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n");
+ ksft_test_result_fail("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
lseek(fd, 0, SEEK_SET);
if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
- perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n");
+ ksft_test_result_fail("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
@@ -129,67 +135,58 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
huge pages */
compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
- if (compaction_index > 3) {
- printf("No of huge pages allocated = %d\n",
- (atoi(nr_hugepages)));
- fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
- "as huge pages\n", compaction_index);
- goto close_fd;
- }
-
- printf("No of huge pages allocated = %d\n",
- (atoi(nr_hugepages)));
-
lseek(fd, 0, SEEK_SET);
if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
!= strlen(initial_nr_hugepages)) {
- perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
+ ksft_test_result_fail("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
- close(fd);
- return 0;
+ if (compaction_index > 3) {
+ ksft_print_msg("ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ ksft_test_result_fail("No of huge pages allocated = %d\n", (atoi(nr_hugepages)));
+ goto close_fd;
+ }
+
+ ksft_test_result_pass("Memory compaction succeeded. No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+ ret = 0;
close_fd:
close(fd);
- printf("Not OK. Compaction test failed.");
- return -1;
+ return ret;
}
int main(int argc, char **argv)
{
struct rlimit lim;
- struct map_list *list, *entry;
+ struct map_list *list = NULL, *entry;
size_t page_size, i;
void *map = NULL;
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
long mem_fragmentable_MB = 0;
- if (prereq() != 0) {
- printf("Either the sysctl compact_unevictable_allowed is not\n"
- "set to 1 or couldn't read the proc file.\n"
- "Skipping the test\n");
- return KSFT_SKIP;
- }
+ ksft_print_header();
+
+ if (prereq() || geteuid())
+ return ksft_exit_pass();
+
+ ksft_set_plan(1);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
- if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
- perror("Failed to set rlimit:\n");
- return -1;
- }
+ if (setrlimit(RLIMIT_MEMLOCK, &lim))
+ ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno));
page_size = getpagesize();
- list = NULL;
-
- if (read_memory_info(&mem_free, &hugepage_size) != 0) {
- printf("ERROR: Cannot read meminfo\n");
- return -1;
- }
+ if (read_memory_info(&mem_free, &hugepage_size) != 0)
+ ksft_exit_fail_msg("Failed to get meminfo\n");
mem_fragmentable_MB = mem_free * 0.8 / 1024;
@@ -225,7 +222,7 @@ int main(int argc, char **argv)
}
if (check_compaction(mem_free, hugepage_size) == 0)
- return 0;
+ return ksft_exit_pass();
- return -1;
+ return ksft_exit_fail();
}
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 6f2f83990441..363bf5f801be 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -29,15 +29,49 @@
#include "../../../../mm/gup_test.h"
#include "../kselftest.h"
#include "vm_util.h"
+#include "thp_settings.h"
static size_t pagesize;
static int pagemap_fd;
-static size_t thpsize;
+static size_t pmdsize;
+static int nr_thpsizes;
+static size_t thpsizes[20];
static int nr_hugetlbsizes;
static size_t hugetlbsizes[10];
static int gup_fd;
static bool has_huge_zeropage;
+static int sz2ord(size_t size)
+{
+ return __builtin_ctzll(size / pagesize);
+}
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+ int count = 0;
+ unsigned long orders;
+ size_t kb;
+ int i;
+
+ /* thp not supported at all. */
+ if (!pmdsize)
+ return 0;
+
+ orders = 1UL << sz2ord(pmdsize);
+ orders |= thp_supported_orders();
+
+ for (i = 0; orders && count < max; i++) {
+ if (!(orders & (1UL << i)))
+ continue;
+ orders &= ~(1UL << i);
+ kb = (pagesize >> 10) << i;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+ }
+
+ return count;
+}
+
static void detect_huge_zeropage(void)
{
int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
@@ -734,7 +768,7 @@ enum thp_run {
THP_RUN_PARTIAL_SHARED,
};
-static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
+static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
{
char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
size_t size, mmap_size, mremap_size;
@@ -759,11 +793,11 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
}
/*
- * Try to populate a THP. Touch the first sub-page and test if we get
- * another sub-page populated automatically.
+ * Try to populate a THP. Touch the first sub-page and test if
+ * we get the last sub-page populated automatically.
*/
mem[0] = 0;
- if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) {
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
ksft_test_result_skip("Did not get a THP populated\n");
goto munmap;
}
@@ -773,12 +807,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
switch (thp_run) {
case THP_RUN_PMD:
case THP_RUN_PMD_SWAPOUT:
+ assert(thpsize == pmdsize);
break;
case THP_RUN_PTE:
case THP_RUN_PTE_SWAPOUT:
/*
* Trigger PTE-mapping the THP by temporarily mapping a single
- * subpage R/O.
+ * subpage R/O. This is a noop if the THP is not pmdsize (and
+ * therefore already PTE-mapped).
*/
ret = mprotect(mem + pagesize, pagesize, PROT_READ);
if (ret) {
@@ -875,52 +911,60 @@ munmap:
munmap(mremap_mem, mremap_size);
}
-static void run_with_thp(test_fn fn, const char *desc)
+static void run_with_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PMD);
+ ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD, size);
}
-static void run_with_thp_swap(test_fn fn, const char *desc)
+static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT);
+ ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
}
-static void run_with_pte_mapped_thp(test_fn fn, const char *desc)
+static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PTE);
+ ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE, size);
}
-static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc)
+static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT);
+ ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
}
-static void run_with_single_pte_of_thp(test_fn fn, const char *desc)
+static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc);
- do_run_with_thp(fn, THP_RUN_SINGLE_PTE);
+ ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
}
-static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc)
+static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc);
- do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT);
+ ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
}
-static void run_with_partial_mremap_thp(test_fn fn, const char *desc)
+static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP);
+ ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
}
-static void run_with_partial_shared_thp(test_fn fn, const char *desc)
+static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
{
- ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc);
- do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
+ ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
}
static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
@@ -1091,15 +1135,27 @@ static void run_anon_test_case(struct test_case const *test_case)
run_with_base_page(test_case->fn, test_case->desc);
run_with_base_page_swap(test_case->fn, test_case->desc);
- if (thpsize) {
- run_with_thp(test_case->fn, test_case->desc);
- run_with_thp_swap(test_case->fn, test_case->desc);
- run_with_pte_mapped_thp(test_case->fn, test_case->desc);
- run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc);
- run_with_single_pte_of_thp(test_case->fn, test_case->desc);
- run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc);
- run_with_partial_mremap_thp(test_case->fn, test_case->desc);
- run_with_partial_shared_thp(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_thpsizes; i++) {
+ size_t size = thpsizes[i];
+ struct thp_settings settings = *thp_current_settings();
+
+ settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
+ settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ thp_push_settings(&settings);
+
+ if (size == pmdsize) {
+ run_with_thp(test_case->fn, test_case->desc, size);
+ run_with_thp_swap(test_case->fn, test_case->desc, size);
+ }
+
+ run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
+ run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
+ run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
+
+ thp_pop_settings();
}
for (i = 0; i < nr_hugetlbsizes; i++)
run_with_hugetlb(test_case->fn, test_case->desc,
@@ -1120,8 +1176,9 @@ static int tests_per_anon_test_case(void)
{
int tests = 2 + nr_hugetlbsizes;
- if (thpsize)
- tests += 8;
+ tests += 6 * nr_thpsizes;
+ if (pmdsize)
+ tests += 2;
return tests;
}
@@ -1329,7 +1386,7 @@ static void run_anon_thp_test_cases(void)
{
int i;
- if (!thpsize)
+ if (!pmdsize)
return;
ksft_print_msg("[INFO] Anonymous THP tests\n");
@@ -1338,13 +1395,13 @@ static void run_anon_thp_test_cases(void)
struct test_case const *test_case = &anon_thp_test_cases[i];
ksft_print_msg("[RUN] %s\n", test_case->desc);
- do_run_with_thp(test_case->fn, THP_RUN_PMD);
+ do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
}
}
static int tests_per_anon_thp_test_case(void)
{
- return thpsize ? 1 : 0;
+ return pmdsize ? 1 : 0;
}
typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
@@ -1419,7 +1476,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
}
/* For alignment purposes, we need twice the thp size. */
- mmap_size = 2 * thpsize;
+ mmap_size = 2 * pmdsize;
mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mmap_mem == MAP_FAILED) {
@@ -1434,11 +1491,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
}
/* We need a THP-aligned memory area. */
- mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
- smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1));
+ mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
+ smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
- ret = madvise(mem, thpsize, MADV_HUGEPAGE);
- ret |= madvise(smem, thpsize, MADV_HUGEPAGE);
+ ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
+ ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
if (ret) {
ksft_test_result_fail("MADV_HUGEPAGE failed\n");
goto munmap;
@@ -1457,7 +1514,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
goto munmap;
}
- fn(mem, smem, thpsize);
+ fn(mem, smem, pmdsize);
munmap:
munmap(mmap_mem, mmap_size);
if (mmap_smem != MAP_FAILED)
@@ -1650,7 +1707,7 @@ static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
run_with_zeropage(test_case->fn, test_case->desc);
run_with_memfd(test_case->fn, test_case->desc);
run_with_tmpfile(test_case->fn, test_case->desc);
- if (thpsize)
+ if (pmdsize)
run_with_huge_zeropage(test_case->fn, test_case->desc);
for (i = 0; i < nr_hugetlbsizes; i++)
run_with_memfd_hugetlb(test_case->fn, test_case->desc,
@@ -1671,7 +1728,7 @@ static int tests_per_non_anon_test_case(void)
{
int tests = 3 + nr_hugetlbsizes;
- if (thpsize)
+ if (pmdsize)
tests += 1;
return tests;
}
@@ -1679,14 +1736,23 @@ static int tests_per_non_anon_test_case(void)
int main(int argc, char **argv)
{
int err;
+ struct thp_settings default_settings;
ksft_print_header();
pagesize = getpagesize();
- thpsize = read_pmd_pagesize();
- if (thpsize)
- ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
- thpsize / 1024);
+ pmdsize = read_pmd_pagesize();
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_read_settings(&default_settings);
+ default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
+ thp_save_settings();
+ thp_push_settings(&default_settings);
+
+ ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
+ pmdsize / 1024);
+ nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+ }
nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
ARRAY_SIZE(hugetlbsizes));
detect_huge_zeropage();
@@ -1704,6 +1770,11 @@ int main(int argc, char **argv)
run_anon_thp_test_cases();
run_non_anon_test_cases();
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_restore_settings();
+ }
+
err = ksft_get_fail_cnt();
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index ec2229136384..cbe99594d319 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -50,39 +50,41 @@ static char *cmd_to_str(unsigned long cmd)
void *gup_thread(void *data)
{
struct gup_test gup = *(struct gup_test *)data;
- int i;
+ int i, status;
/* Only report timing information on the *_BENCHMARK commands: */
if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
(cmd == PIN_LONGTERM_BENCHMARK)) {
for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(gup_fd, cmd, &gup))
- perror("ioctl"), exit(1);
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ break;
pthread_mutex_lock(&print_mutex);
- printf("%s: Time: get:%lld put:%lld us",
- cmd_to_str(cmd), gup.get_delta_usec,
- gup.put_delta_usec);
+ ksft_print_msg("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
+ ksft_print_msg(", truncated (size: %lld)", gup.size);
+ ksft_print_msg("\n");
pthread_mutex_unlock(&print_mutex);
}
} else {
gup.size = size;
- if (ioctl(gup_fd, cmd, &gup)) {
- perror("ioctl");
- exit(1);
- }
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ goto return_;
pthread_mutex_lock(&print_mutex);
- printf("%s: done\n", cmd_to_str(cmd));
+ ksft_print_msg("%s: done\n", cmd_to_str(cmd));
if (gup.size != size)
- printf("Truncated (size: %lld)\n", gup.size);
+ ksft_print_msg("Truncated (size: %lld)\n", gup.size);
pthread_mutex_unlock(&print_mutex);
}
+return_:
+ ksft_test_result(!status, "ioctl status %d\n", status);
return NULL;
}
@@ -170,7 +172,7 @@ int main(int argc, char **argv)
touch = 1;
break;
default:
- return -1;
+ ksft_exit_fail_msg("Wrong argument\n");
}
}
@@ -198,11 +200,12 @@ int main(int argc, char **argv)
}
}
+ ksft_print_header();
+ ksft_set_plan(nthreads);
+
filed = open(file, O_RDWR|O_CREAT);
- if (filed < 0) {
- perror("open");
- exit(filed);
- }
+ if (filed < 0)
+ ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
gup.nr_pages_per_call = nr_pages;
if (write)
@@ -213,27 +216,24 @@ int main(int argc, char **argv)
switch (errno) {
case EACCES:
if (getuid())
- printf("Please run this test as root\n");
+ ksft_print_msg("Please run this test as root\n");
break;
case ENOENT:
- if (opendir("/sys/kernel/debug") == NULL) {
- printf("mount debugfs at /sys/kernel/debug\n");
- break;
- }
- printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+ if (opendir("/sys/kernel/debug") == NULL)
+ ksft_print_msg("mount debugfs at /sys/kernel/debug\n");
+ ksft_print_msg("check if CONFIG_GUP_TEST is enabled in kernel config\n");
break;
default:
- perror("failed to open " GUP_TEST_FILE);
+ ksft_print_msg("failed to open %s: %s\n", GUP_TEST_FILE, strerror(errno));
break;
}
- exit(KSFT_SKIP);
+ ksft_test_result_skip("Please run this test as root\n");
+ return ksft_exit_pass();
}
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
- if (p == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (p == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
gup.addr = (unsigned long)p;
if (thp == 1)
@@ -264,7 +264,8 @@ int main(int argc, char **argv)
ret = pthread_join(tid[i], NULL);
assert(ret == 0);
}
+
free(tid);
- return 0;
+ return ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
index 955ef87f382c..267eea2e0e0b 100644
--- a/tools/testing/selftests/mm/hugepage-mmap.c
+++ b/tools/testing/selftests/mm/hugepage-mmap.c
@@ -22,6 +22,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "../kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
@@ -37,7 +38,7 @@
static void check_bytes(char *addr)
{
- printf("First hex is %x\n", *((unsigned int *)addr));
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
}
static void write_bytes(char *addr)
@@ -55,7 +56,7 @@ static int read_bytes(char *addr)
check_bytes(addr);
for (i = 0; i < LENGTH; i++)
if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
+ ksft_print_msg("Error: Mismatch at %lu\n", i);
return 1;
}
return 0;
@@ -66,20 +67,20 @@ int main(void)
void *addr;
int fd, ret;
+ ksft_print_header();
+ ksft_set_plan(1);
+
fd = memfd_create("hugepage-mmap", MFD_HUGETLB);
- if (fd < 0) {
- perror("memfd_create() failed");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno));
addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
if (addr == MAP_FAILED) {
- perror("mmap");
close(fd);
- exit(1);
+ ksft_exit_fail_msg("mmap(): %s\n", strerror(errno));
}
- printf("Returned address is %p\n", addr);
+ ksft_print_msg("Returned address is %p\n", addr);
check_bytes(addr);
write_bytes(addr);
ret = read_bytes(addr);
@@ -87,5 +88,7 @@ int main(void)
munmap(addr, LENGTH);
close(fd);
- return ret;
+ ksft_test_result(!ret, "Read same data\n");
+
+ ksft_exit(!ret);
}
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index cabd0084f57b..c463d1c09c9b 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -24,6 +24,7 @@
#include <sys/ioctl.h>
#include <string.h>
#include <stdbool.h>
+#include "../kselftest.h"
#include "vm_util.h"
#define DEFAULT_LENGTH_MB 10UL
@@ -34,7 +35,7 @@
static void check_bytes(char *addr)
{
- printf("First hex is %x\n", *((unsigned int *)addr));
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
}
static void write_bytes(char *addr, size_t len)
@@ -52,7 +53,7 @@ static int read_bytes(char *addr, size_t len)
check_bytes(addr);
for (i = 0; i < len; i++)
if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
+ ksft_print_msg("Mismatch at %lu\n", i);
return 1;
}
return 0;
@@ -66,17 +67,13 @@ static void register_region_with_uffd(char *addr, size_t len)
/* Create and enable userfaultfd object. */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1) {
- perror("userfaultfd");
- exit(1);
- }
+ if (uffd == -1)
+ ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno));
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
- perror("ioctl-UFFDIO_API");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
+ ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno));
/* Create a private anonymous mapping. The memory will be
* demand-zero paged--that is, not yet allocated. When we
@@ -86,21 +83,17 @@ static void register_region_with_uffd(char *addr, size_t len)
addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
- printf("Address returned by mmap() = %p\n", addr);
+ ksft_print_msg("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
* handling by the userfaultfd object. In mode, we request to track
* missing pages (i.e., pages that have not yet been faulted in).
*/
- if (uffd_register(uffd, addr, len, true, false, false)) {
- perror("ioctl-UFFDIO_REGISTER");
- exit(1);
- }
+ if (uffd_register(uffd, addr, len, true, false, false))
+ ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno));
}
int main(int argc, char *argv[])
@@ -108,10 +101,11 @@ int main(int argc, char *argv[])
size_t length = 0;
int ret = 0, fd;
- if (argc >= 2 && !strcmp(argv[1], "-h")) {
- printf("Usage: %s [length_in_MB]\n", argv[0]);
- exit(1);
- }
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (argc >= 2 && !strcmp(argv[1], "-h"))
+ ksft_exit_fail_msg("Usage: %s [length_in_MB]\n", argv[0]);
/* Read memory length as the first arg if valid, otherwise fallback to
* the default length.
@@ -123,50 +117,40 @@ int main(int argc, char *argv[])
length = MB_TO_BYTES(length);
fd = memfd_create(argv[0], MFD_HUGETLB);
- if (fd < 0) {
- perror("Open failed");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_msg("Open failed: %s\n", strerror(errno));
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
- printf("Map haddr: Returned address is %p\n", haddr);
- if (haddr == MAP_FAILED) {
- perror("mmap1");
- exit(1);
- }
+ ksft_print_msg("Map haddr: Returned address is %p\n", haddr);
+ if (haddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap1: %s\n", strerror(errno));
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
- printf("Map daddr: Returned address is %p\n", daddr);
- if (daddr == MAP_FAILED) {
- perror("mmap3");
- exit(1);
- }
+ ksft_print_msg("Map daddr: Returned address is %p\n", daddr);
+ if (daddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap3: %s\n", strerror(errno));
suggested_addr = 0x7faa40000000;
void *vaddr =
mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
- printf("Map vaddr: Returned address is %p\n", vaddr);
- if (vaddr == MAP_FAILED) {
- perror("mmap2");
- exit(1);
- }
+ ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr);
+ if (vaddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap2: %s\n", strerror(errno));
register_region_with_uffd(haddr, length);
void *addr = mremap(haddr, length, length,
MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
- if (addr == MAP_FAILED) {
- perror("mremap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mremap: %s\n", strerror(errno));
- printf("Mremap: Returned address is %p\n", addr);
+ ksft_print_msg("Mremap: Returned address is %p\n", addr);
check_bytes(addr);
write_bytes(addr, length);
ret = read_bytes(addr, length);
@@ -174,12 +158,11 @@ int main(int argc, char *argv[])
munmap(addr, length);
addr = mremap(addr, length, length, 0);
- if (addr != MAP_FAILED) {
- printf("mremap: Expected failure, but call succeeded\n");
- exit(1);
- }
+ if (addr != MAP_FAILED)
+ ksft_exit_fail_msg("mremap: Expected failure, but call succeeded\n");
close(fd);
- return ret;
+ ksft_test_result(!ret, "Read same data\n");
+ ksft_exit(!ret);
}
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
index 030667cb5533..829320a519e7 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -22,13 +22,14 @@
#include "linux/magic.h"
#include "vm_util.h"
+#include "thp_settings.h"
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
static unsigned long page_size;
static int hpage_pmd_nr;
+static int anon_order;
-#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
#define PID_SMAPS "/proc/self/smaps"
#define TEST_FILE "collapse_test_file"
@@ -71,78 +72,7 @@ struct file_info {
};
static struct file_info finfo;
-
-enum thp_enabled {
- THP_ALWAYS,
- THP_MADVISE,
- THP_NEVER,
-};
-
-static const char *thp_enabled_strings[] = {
- "always",
- "madvise",
- "never",
- NULL
-};
-
-enum thp_defrag {
- THP_DEFRAG_ALWAYS,
- THP_DEFRAG_DEFER,
- THP_DEFRAG_DEFER_MADVISE,
- THP_DEFRAG_MADVISE,
- THP_DEFRAG_NEVER,
-};
-
-static const char *thp_defrag_strings[] = {
- "always",
- "defer",
- "defer+madvise",
- "madvise",
- "never",
- NULL
-};
-
-enum shmem_enabled {
- SHMEM_ALWAYS,
- SHMEM_WITHIN_SIZE,
- SHMEM_ADVISE,
- SHMEM_NEVER,
- SHMEM_DENY,
- SHMEM_FORCE,
-};
-
-static const char *shmem_enabled_strings[] = {
- "always",
- "within_size",
- "advise",
- "never",
- "deny",
- "force",
- NULL
-};
-
-struct khugepaged_settings {
- bool defrag;
- unsigned int alloc_sleep_millisecs;
- unsigned int scan_sleep_millisecs;
- unsigned int max_ptes_none;
- unsigned int max_ptes_swap;
- unsigned int max_ptes_shared;
- unsigned long pages_to_scan;
-};
-
-struct settings {
- enum thp_enabled thp_enabled;
- enum thp_defrag thp_defrag;
- enum shmem_enabled shmem_enabled;
- bool use_zero_page;
- struct khugepaged_settings khugepaged;
- unsigned long read_ahead_kb;
-};
-
-static struct settings saved_settings;
static bool skip_settings_restore;
-
static int exit_status;
static void success(const char *msg)
@@ -161,260 +91,34 @@ static void skip(const char *msg)
printf(" \e[33m%s\e[0m\n", msg);
}
-static int read_file(const char *path, char *buf, size_t buflen)
-{
- int fd;
- ssize_t numread;
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return 0;
-
- numread = read(fd, buf, buflen - 1);
- if (numread < 1) {
- close(fd);
- return 0;
- }
-
- buf[numread] = '\0';
- close(fd);
-
- return (unsigned int) numread;
-}
-
-static int write_file(const char *path, const char *buf, size_t buflen)
-{
- int fd;
- ssize_t numwritten;
-
- fd = open(path, O_WRONLY);
- if (fd == -1) {
- printf("open(%s)\n", path);
- exit(EXIT_FAILURE);
- return 0;
- }
-
- numwritten = write(fd, buf, buflen - 1);
- close(fd);
- if (numwritten < 1) {
- printf("write(%s)\n", buf);
- exit(EXIT_FAILURE);
- return 0;
- }
-
- return (unsigned int) numwritten;
-}
-
-static int read_string(const char *name, const char *strings[])
+static void restore_settings_atexit(void)
{
- char path[PATH_MAX];
- char buf[256];
- char *c;
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- if (!read_file(path, buf, sizeof(buf))) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-
- c = strchr(buf, '[');
- if (!c) {
- printf("%s: Parse failure\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- c++;
- memmove(buf, c, sizeof(buf) - (c - buf));
-
- c = strchr(buf, ']');
- if (!c) {
- printf("%s: Parse failure\n", __func__);
- exit(EXIT_FAILURE);
- }
- *c = '\0';
-
- ret = 0;
- while (strings[ret]) {
- if (!strcmp(strings[ret], buf))
- return ret;
- ret++;
- }
-
- printf("Failed to parse %s\n", name);
- exit(EXIT_FAILURE);
-}
-
-static void write_string(const char *name, const char *val)
-{
- char path[PATH_MAX];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- if (!write_file(path, val, strlen(val) + 1)) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-}
-
-static const unsigned long _read_num(const char *path)
-{
- char buf[21];
-
- if (read_file(path, buf, sizeof(buf)) < 0) {
- perror("read_file(read_num)");
- exit(EXIT_FAILURE);
- }
-
- return strtoul(buf, NULL, 10);
-}
-
-static const unsigned long read_num(const char *name)
-{
- char path[PATH_MAX];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- return _read_num(path);
-}
-
-static void _write_num(const char *path, unsigned long num)
-{
- char buf[21];
-
- sprintf(buf, "%ld", num);
- if (!write_file(path, buf, strlen(buf) + 1)) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-}
-
-static void write_num(const char *name, unsigned long num)
-{
- char path[PATH_MAX];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- _write_num(path, num);
-}
-
-static void write_settings(struct settings *settings)
-{
- struct khugepaged_settings *khugepaged = &settings->khugepaged;
-
- write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
- write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
- write_string("shmem_enabled",
- shmem_enabled_strings[settings->shmem_enabled]);
- write_num("use_zero_page", settings->use_zero_page);
-
- write_num("khugepaged/defrag", khugepaged->defrag);
- write_num("khugepaged/alloc_sleep_millisecs",
- khugepaged->alloc_sleep_millisecs);
- write_num("khugepaged/scan_sleep_millisecs",
- khugepaged->scan_sleep_millisecs);
- write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
- write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
- write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
- write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
-
- if (file_ops && finfo.type == VMA_FILE)
- _write_num(finfo.dev_queue_read_ahead_path,
- settings->read_ahead_kb);
-}
-
-#define MAX_SETTINGS_DEPTH 4
-static struct settings settings_stack[MAX_SETTINGS_DEPTH];
-static int settings_index;
-
-static struct settings *current_settings(void)
-{
- if (!settings_index) {
- printf("Fail: No settings set");
- exit(EXIT_FAILURE);
- }
- return settings_stack + settings_index - 1;
-}
+ if (skip_settings_restore)
+ return;
-static void push_settings(struct settings *settings)
-{
- if (settings_index >= MAX_SETTINGS_DEPTH) {
- printf("Fail: Settings stack exceeded");
- exit(EXIT_FAILURE);
- }
- settings_stack[settings_index++] = *settings;
- write_settings(current_settings());
-}
+ printf("Restore THP and khugepaged settings...");
+ thp_restore_settings();
+ success("OK");
-static void pop_settings(void)
-{
- if (settings_index <= 0) {
- printf("Fail: Settings stack empty");
- exit(EXIT_FAILURE);
- }
- --settings_index;
- write_settings(current_settings());
+ skip_settings_restore = true;
}
static void restore_settings(int sig)
{
- if (skip_settings_restore)
- goto out;
-
- printf("Restore THP and khugepaged settings...");
- write_settings(&saved_settings);
- success("OK");
- if (sig)
- exit(EXIT_FAILURE);
-out:
- exit(exit_status);
+ /* exit() will invoke the restore_settings_atexit handler. */
+ exit(sig ? EXIT_FAILURE : exit_status);
}
static void save_settings(void)
{
printf("Save THP and khugepaged settings...");
- saved_settings = (struct settings) {
- .thp_enabled = read_string("enabled", thp_enabled_strings),
- .thp_defrag = read_string("defrag", thp_defrag_strings),
- .shmem_enabled =
- read_string("shmem_enabled", shmem_enabled_strings),
- .use_zero_page = read_num("use_zero_page"),
- };
- saved_settings.khugepaged = (struct khugepaged_settings) {
- .defrag = read_num("khugepaged/defrag"),
- .alloc_sleep_millisecs =
- read_num("khugepaged/alloc_sleep_millisecs"),
- .scan_sleep_millisecs =
- read_num("khugepaged/scan_sleep_millisecs"),
- .max_ptes_none = read_num("khugepaged/max_ptes_none"),
- .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
- .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
- .pages_to_scan = read_num("khugepaged/pages_to_scan"),
- };
if (file_ops && finfo.type == VMA_FILE)
- saved_settings.read_ahead_kb =
- _read_num(finfo.dev_queue_read_ahead_path);
+ thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
+ thp_save_settings();
success("OK");
+ atexit(restore_settings_atexit);
signal(SIGTERM, restore_settings);
signal(SIGINT, restore_settings);
signal(SIGHUP, restore_settings);
@@ -793,7 +497,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
struct mem_ops *ops, bool expect)
{
int ret;
- struct settings settings = *current_settings();
+ struct thp_settings settings = *thp_current_settings();
printf("%s...", msg);
@@ -803,7 +507,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
*/
settings.thp_enabled = THP_NEVER;
settings.shmem_enabled = SHMEM_NEVER;
- push_settings(&settings);
+ thp_push_settings(&settings);
/* Clear VM_NOHUGEPAGE */
madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
@@ -815,7 +519,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
else
success("OK");
- pop_settings();
+ thp_pop_settings();
}
static void madvise_collapse(const char *msg, char *p, int nr_hpages,
@@ -845,13 +549,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
/* Wait until the second full_scan completed */
- full_scans = read_num("khugepaged/full_scans") + 2;
+ full_scans = thp_read_num("khugepaged/full_scans") + 2;
printf("%s...", msg);
while (timeout--) {
if (ops->check_huge(p, nr_hpages))
break;
- if (read_num("khugepaged/full_scans") >= full_scans)
+ if (thp_read_num("khugepaged/full_scans") >= full_scans)
break;
printf(".");
usleep(TICK);
@@ -904,13 +608,18 @@ static bool is_tmpfs(struct mem_ops *ops)
return ops == &__file_ops && finfo.type == VMA_SHMEM;
}
+static bool is_anon(struct mem_ops *ops)
+{
+ return ops == &__anon_ops;
+}
+
static void alloc_at_fault(void)
{
- struct settings settings = *current_settings();
+ struct thp_settings settings = *thp_current_settings();
char *p;
settings.thp_enabled = THP_ALWAYS;
- push_settings(&settings);
+ thp_push_settings(&settings);
p = alloc_mapping(1);
*p = 1;
@@ -920,7 +629,7 @@ static void alloc_at_fault(void)
else
fail("Fail");
- pop_settings();
+ thp_pop_settings();
madvise(p, page_size, MADV_DONTNEED);
printf("Split huge PMD on MADV_DONTNEED...");
@@ -968,11 +677,12 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops
static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_none = hpage_pmd_nr / 2;
- struct settings settings = *current_settings();
+ struct thp_settings settings = *thp_current_settings();
void *p;
+ int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1;
settings.khugepaged.max_ptes_none = max_ptes_none;
- push_settings(&settings);
+ thp_push_settings(&settings);
p = ops->setup_area(1);
@@ -983,10 +693,10 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o
goto skip;
}
- ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
ops, !c->enforce_pte_scan_limits);
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
if (c->enforce_pte_scan_limits) {
ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
@@ -997,7 +707,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o
}
skip:
ops->cleanup_area(p, hpage_pmd_size);
- pop_settings();
+ thp_pop_settings();
}
static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
@@ -1028,7 +738,7 @@ out:
static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
{
- int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
+ int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap");
void *p;
p = ops->setup_area(1);
@@ -1245,11 +955,11 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o
fail("Fail");
ops->fault(p, 0, page_size);
- write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+ thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
c->collapse("Collapse PTE table full of compound pages in child",
p, 1, ops, true);
- write_num("khugepaged/max_ptes_shared",
- current_settings()->khugepaged.max_ptes_shared);
+ thp_write_num("khugepaged/max_ptes_shared",
+ thp_current_settings()->khugepaged.max_ptes_shared);
validate_memory(p, 0, hpage_pmd_size);
ops->cleanup_area(p, hpage_pmd_size);
@@ -1270,7 +980,7 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o
static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
{
- int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
+ int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared");
int wstatus;
void *p;
@@ -1373,7 +1083,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c,
static void usage(void)
{
- fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
+ fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n");
fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
@@ -1382,15 +1092,34 @@ static void usage(void)
fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
+ fprintf(stderr, "\n\tSupported Options:\n");
+ fprintf(stderr, "\t\t-h: This help message.\n");
+ fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n");
+ fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n");
exit(1);
}
-static void parse_test_type(int argc, const char **argv)
+static void parse_test_type(int argc, char **argv)
{
+ int opt;
char *buf;
const char *token;
- if (argc == 1) {
+ while ((opt = getopt(argc, argv, "s:h")) != -1) {
+ switch (opt) {
+ case 's':
+ anon_order = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc == 0) {
/* Backwards compatibility */
khugepaged_context = &__khugepaged_context;
madvise_context = &__madvise_context;
@@ -1398,7 +1127,7 @@ static void parse_test_type(int argc, const char **argv)
return;
}
- buf = strdup(argv[1]);
+ buf = strdup(argv[0]);
token = strsep(&buf, ":");
if (!strcmp(token, "all")) {
@@ -1432,13 +1161,16 @@ static void parse_test_type(int argc, const char **argv)
if (!file_ops)
return;
- if (argc != 3)
+ if (argc != 2)
usage();
+
+ get_finfo(argv[1]);
}
-int main(int argc, const char **argv)
+int main(int argc, char **argv)
{
- struct settings default_settings = {
+ int hpage_pmd_order;
+ struct thp_settings default_settings = {
.thp_enabled = THP_MADVISE,
.thp_defrag = THP_DEFRAG_ALWAYS,
.shmem_enabled = SHMEM_ADVISE,
@@ -1460,9 +1192,6 @@ int main(int argc, const char **argv)
parse_test_type(argc, argv);
- if (file_ops)
- get_finfo(argv[2]);
-
setbuf(stdout, NULL);
page_size = getpagesize();
@@ -1472,14 +1201,17 @@ int main(int argc, const char **argv)
exit(EXIT_FAILURE);
}
hpage_pmd_nr = hpage_pmd_size / page_size;
+ hpage_pmd_order = __builtin_ctz(hpage_pmd_nr);
default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+ default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT;
+ default_settings.hugepages[anon_order].enabled = THP_ALWAYS;
save_settings();
- push_settings(&default_settings);
+ thp_push_settings(&default_settings);
alloc_at_fault();
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 00757445278e..246d53a5d7f2 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -5,6 +5,7 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+count_total=0
count_pass=0
count_fail=0
count_skip=0
@@ -17,6 +18,7 @@ usage: ${BASH_SOURCE[0]:-$0} [ options ]
-a: run all tests, including extra ones
-t: specify specific categories to tests to run
-h: display this message
+ -n: disable TAP output
The default behavior is to run required tests only. If -a is specified,
will run all tests.
@@ -77,12 +79,14 @@ EOF
}
RUN_ALL=false
+TAP_PREFIX="# "
-while getopts "aht:" OPT; do
+while getopts "aht:n" OPT; do
case ${OPT} in
"a") RUN_ALL=true ;;
"h") usage ;;
"t") VM_SELFTEST_ITEMS=${OPTARG} ;;
+ "n") TAP_PREFIX= ;;
esac
done
shift $((OPTIND -1))
@@ -184,30 +188,52 @@ fi
VADDR64=0
echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1
+tap_prefix() {
+ sed -e "s/^/${TAP_PREFIX}/"
+}
+
+tap_output() {
+ if [[ ! -z "$TAP_PREFIX" ]]; then
+ read str
+ echo $str
+ fi
+}
+
+pretty_name() {
+ echo "$*" | sed -e 's/^\(bash \)\?\.\///'
+}
+
# Usage: run_test [test binary] [arbitrary test arguments...]
run_test() {
if test_selected ${CATEGORY}; then
+ local test=$(pretty_name "$*")
local title="running $*"
local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
- printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
+ printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix
- "$@"
- local ret=$?
+ ("$@" 2>&1) | tap_prefix
+ local ret=${PIPESTATUS[0]}
+ count_total=$(( count_total + 1 ))
if [ $ret -eq 0 ]; then
count_pass=$(( count_pass + 1 ))
- echo "[PASS]"
+ echo "[PASS]" | tap_prefix
+ echo "ok ${count_total} ${test}" | tap_output
elif [ $ret -eq $ksft_skip ]; then
count_skip=$(( count_skip + 1 ))
- echo "[SKIP]"
+ echo "[SKIP]" | tap_prefix
+ echo "ok ${count_total} ${test} # SKIP" | tap_output
exitcode=$ksft_skip
else
count_fail=$(( count_fail + 1 ))
- echo "[FAIL]"
+ echo "[FAIL]" | tap_prefix
+ echo "not ok ${count_total} ${test} # exit=$ret" | tap_output
exitcode=1
fi
fi # test_selected
}
+echo "TAP version 13" | tap_output
+
CATEGORY="hugetlb" run_test ./hugepage-mmap
shmmax=$(cat /proc/sys/kernel/shmmax)
@@ -231,9 +257,9 @@ CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
if test_selected "hugetlb"; then
- echo "NOTE: These hugetlb tests provide minimal coverage. Use"
- echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
- echo " hugetlb regression testing."
+ echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix
+ echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" | tap_prefix
+ echo " hugetlb regression testing." | tap_prefix
fi
CATEGORY="mmap" run_test ./map_fixed_noreplace
@@ -312,7 +338,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
-echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
+(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
CATEGORY="memfd_secret" run_test ./memfd_secret
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
@@ -334,8 +360,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
CATEGORY="ksm" run_test ./ksm_functional_tests
-run_test ./ksm_functional_tests
-
# protection_keys tests
if [ -x ./protection_keys_32 ]
then
@@ -359,6 +383,8 @@ CATEGORY="cow" run_test ./cow
CATEGORY="thp" run_test ./khugepaged
+CATEGORY="thp" run_test ./khugepaged -s 2
+
CATEGORY="thp" run_test ./transhuge-stress -d 20
CATEGORY="thp" run_test ./split_huge_page_test
@@ -369,6 +395,7 @@ CATEGORY="mkdirty" run_test ./mkdirty
CATEGORY="mdwe" run_test ./mdwe_test
-echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}"
+echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
+echo "1..${count_total}" | tap_output
exit $exitcode
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
new file mode 100644
index 000000000000..a4163438108e
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "thp_settings.h"
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define MAX_SETTINGS_DEPTH 4
+static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH];
+static int settings_index;
+static struct thp_settings saved_settings;
+static char dev_queue_read_ahead_path[PATH_MAX];
+
+static const char * const thp_enabled_strings[] = {
+ "never",
+ "always",
+ "inherit",
+ "madvise",
+ NULL
+};
+
+static const char * const thp_defrag_strings[] = {
+ "always",
+ "defer",
+ "defer+madvise",
+ "madvise",
+ "never",
+ NULL
+};
+
+static const char * const shmem_enabled_strings[] = {
+ "always",
+ "within_size",
+ "advise",
+ "never",
+ "deny",
+ "force",
+ NULL
+};
+
+int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1) {
+ printf("open(%s)\n", path);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1) {
+ printf("write(%s)\n", buf);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ return (unsigned int) numwritten;
+}
+
+const unsigned long read_num(const char *path)
+{
+ char buf[21];
+
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file()");
+ exit(EXIT_FAILURE);
+ }
+
+ return strtoul(buf, NULL, 10);
+}
+
+void write_num(const char *path, unsigned long num)
+{
+ char buf[21];
+
+ sprintf(buf, "%ld", num);
+ if (!write_file(path, buf, strlen(buf) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+int thp_read_string(const char *name, const char * const strings[])
+{
+ char path[PATH_MAX];
+ char buf[256];
+ char *c;
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!read_file(path, buf, sizeof(buf))) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+
+ c = strchr(buf, '[');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ c++;
+ memmove(buf, c, sizeof(buf) - (c - buf));
+
+ c = strchr(buf, ']');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ *c = '\0';
+
+ ret = 0;
+ while (strings[ret]) {
+ if (!strcmp(strings[ret], buf))
+ return ret;
+ ret++;
+ }
+
+ printf("Failed to parse %s\n", name);
+ exit(EXIT_FAILURE);
+}
+
+void thp_write_string(const char *name, const char *val)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(path, val, strlen(val) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+const unsigned long thp_read_num(const char *name)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return read_num(path);
+}
+
+void thp_write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ write_num(path, num);
+}
+
+void thp_read_settings(struct thp_settings *settings)
+{
+ unsigned long orders = thp_supported_orders();
+ char path[PATH_MAX];
+ int i;
+
+ *settings = (struct thp_settings) {
+ .thp_enabled = thp_read_string("enabled", thp_enabled_strings),
+ .thp_defrag = thp_read_string("defrag", thp_defrag_strings),
+ .shmem_enabled =
+ thp_read_string("shmem_enabled", shmem_enabled_strings),
+ .use_zero_page = thp_read_num("use_zero_page"),
+ };
+ settings->khugepaged = (struct khugepaged_settings) {
+ .defrag = thp_read_num("khugepaged/defrag"),
+ .alloc_sleep_millisecs =
+ thp_read_num("khugepaged/alloc_sleep_millisecs"),
+ .scan_sleep_millisecs =
+ thp_read_num("khugepaged/scan_sleep_millisecs"),
+ .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"),
+ .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"),
+ .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"),
+ .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"),
+ };
+ if (dev_queue_read_ahead_path[0])
+ settings->read_ahead_kb = read_num(dev_queue_read_ahead_path);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders)) {
+ settings->hugepages[i].enabled = THP_NEVER;
+ continue;
+ }
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ settings->hugepages[i].enabled =
+ thp_read_string(path, thp_enabled_strings);
+ }
+}
+
+void thp_write_settings(struct thp_settings *settings)
+{
+ struct khugepaged_settings *khugepaged = &settings->khugepaged;
+ unsigned long orders = thp_supported_orders();
+ char path[PATH_MAX];
+ int enabled;
+ int i;
+
+ thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+ thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+ thp_write_string("shmem_enabled",
+ shmem_enabled_strings[settings->shmem_enabled]);
+ thp_write_num("use_zero_page", settings->use_zero_page);
+
+ thp_write_num("khugepaged/defrag", khugepaged->defrag);
+ thp_write_num("khugepaged/alloc_sleep_millisecs",
+ khugepaged->alloc_sleep_millisecs);
+ thp_write_num("khugepaged/scan_sleep_millisecs",
+ khugepaged->scan_sleep_millisecs);
+ thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+ thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+ thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+ thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+
+ if (dev_queue_read_ahead_path[0])
+ write_num(dev_queue_read_ahead_path, settings->read_ahead_kb);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders))
+ continue;
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ enabled = settings->hugepages[i].enabled;
+ thp_write_string(path, thp_enabled_strings[enabled]);
+ }
+}
+
+struct thp_settings *thp_current_settings(void)
+{
+ if (!settings_index) {
+ printf("Fail: No settings set");
+ exit(EXIT_FAILURE);
+ }
+ return settings_stack + settings_index - 1;
+}
+
+void thp_push_settings(struct thp_settings *settings)
+{
+ if (settings_index >= MAX_SETTINGS_DEPTH) {
+ printf("Fail: Settings stack exceeded");
+ exit(EXIT_FAILURE);
+ }
+ settings_stack[settings_index++] = *settings;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_pop_settings(void)
+{
+ if (settings_index <= 0) {
+ printf("Fail: Settings stack empty");
+ exit(EXIT_FAILURE);
+ }
+ --settings_index;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_restore_settings(void)
+{
+ thp_write_settings(&saved_settings);
+}
+
+void thp_save_settings(void)
+{
+ thp_read_settings(&saved_settings);
+}
+
+void thp_set_read_ahead_path(char *path)
+{
+ if (!path) {
+ dev_queue_read_ahead_path[0] = '\0';
+ return;
+ }
+
+ strncpy(dev_queue_read_ahead_path, path,
+ sizeof(dev_queue_read_ahead_path));
+ dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0';
+}
+
+unsigned long thp_supported_orders(void)
+{
+ unsigned long orders = 0;
+ char path[PATH_MAX];
+ char buf[256];
+ int ret;
+ int i;
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = read_file(path, buf, sizeof(buf));
+ if (ret)
+ orders |= 1UL << i;
+ }
+
+ return orders;
+}
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
new file mode 100644
index 000000000000..71cbff05f4c7
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __THP_SETTINGS_H__
+#define __THP_SETTINGS_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+enum thp_enabled {
+ THP_NEVER,
+ THP_ALWAYS,
+ THP_INHERIT,
+ THP_MADVISE,
+};
+
+enum thp_defrag {
+ THP_DEFRAG_ALWAYS,
+ THP_DEFRAG_DEFER,
+ THP_DEFRAG_DEFER_MADVISE,
+ THP_DEFRAG_MADVISE,
+ THP_DEFRAG_NEVER,
+};
+
+enum shmem_enabled {
+ SHMEM_ALWAYS,
+ SHMEM_WITHIN_SIZE,
+ SHMEM_ADVISE,
+ SHMEM_NEVER,
+ SHMEM_DENY,
+ SHMEM_FORCE,
+};
+
+#define NR_ORDERS 20
+
+struct hugepages_settings {
+ enum thp_enabled enabled;
+};
+
+struct khugepaged_settings {
+ bool defrag;
+ unsigned int alloc_sleep_millisecs;
+ unsigned int scan_sleep_millisecs;
+ unsigned int max_ptes_none;
+ unsigned int max_ptes_swap;
+ unsigned int max_ptes_shared;
+ unsigned long pages_to_scan;
+};
+
+struct thp_settings {
+ enum thp_enabled thp_enabled;
+ enum thp_defrag thp_defrag;
+ enum shmem_enabled shmem_enabled;
+ bool use_zero_page;
+ struct khugepaged_settings khugepaged;
+ unsigned long read_ahead_kb;
+ struct hugepages_settings hugepages[NR_ORDERS];
+};
+
+int read_file(const char *path, char *buf, size_t buflen);
+int write_file(const char *path, const char *buf, size_t buflen);
+const unsigned long read_num(const char *path);
+void write_num(const char *path, unsigned long num);
+
+int thp_read_string(const char *name, const char * const strings[]);
+void thp_write_string(const char *name, const char *val);
+const unsigned long thp_read_num(const char *name);
+void thp_write_num(const char *name, unsigned long num);
+
+void thp_write_settings(struct thp_settings *settings);
+void thp_read_settings(struct thp_settings *settings);
+struct thp_settings *thp_current_settings(void);
+void thp_push_settings(struct thp_settings *settings);
+void thp_pop_settings(void);
+void thp_restore_settings(void);
+void thp_save_settings(void);
+
+void thp_set_read_ahead_path(char *path);
+unsigned long thp_supported_orders(void);
+
+#endif /* __THP_SETTINGS_H__ */
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 16ed4dfa7359..622987f12c89 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -3,7 +3,8 @@
Before running this huge pages for each huge page size must have been
reserved.
- For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must
+ be used.
Also shmmax must be increased.
And you need to run as root to work around some weird permissions in shm.
And nothing using huge pages should run in parallel.
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 02b89860e193..b0ac0ec2356d 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -17,6 +17,7 @@ bool map_shared;
bool test_uffdio_wp = true;
unsigned long long *count_verify;
uffd_test_ops_t *uffd_test_ops;
+uffd_test_case_ops_t *uffd_test_case_ops;
static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
{
@@ -262,7 +263,7 @@ static inline void munmap_area(void **area)
*area = NULL;
}
-static void uffd_test_ctx_clear(void)
+void uffd_test_ctx_clear(void)
{
size_t i;
@@ -298,7 +299,11 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg)
unsigned long nr, cpu;
int ret;
- uffd_test_ctx_clear();
+ if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) {
+ ret = uffd_test_case_ops->pre_alloc(errmsg);
+ if (ret)
+ return ret;
+ }
ret = uffd_test_ops->allocate_area((void **)&area_src, true);
ret |= uffd_test_ops->allocate_area((void **)&area_dst, false);
@@ -308,6 +313,12 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg)
return ret;
}
+ if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) {
+ ret = uffd_test_case_ops->post_alloc(errmsg);
+ if (ret)
+ return ret;
+ }
+
ret = userfaultfd_open(&features);
if (ret) {
if (errmsg)
@@ -620,6 +631,30 @@ int copy_page(int ufd, unsigned long offset, bool wp)
return __copy_page(ufd, offset, false, wp);
}
+int move_page(int ufd, unsigned long offset, unsigned long len)
+{
+ struct uffdio_move uffdio_move;
+
+ if (offset + len > nr_pages * page_size)
+ err("unexpected offset %lu and length %lu\n", offset, len);
+ uffdio_move.dst = (unsigned long) area_dst + offset;
+ uffdio_move.src = (unsigned long) area_src + offset;
+ uffdio_move.len = len;
+ uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES;
+ uffdio_move.move = 0;
+ if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) {
+ /* real retval in uffdio_move.move */
+ if (uffdio_move.move != -EEXIST)
+ err("UFFDIO_MOVE error: %"PRId64,
+ (int64_t)uffdio_move.move);
+ wake_range(ufd, uffdio_move.dst, len);
+ } else if (uffdio_move.move != len) {
+ err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move);
+ } else
+ return 1;
+ return 0;
+}
+
int uffd_open_dev(unsigned int flags)
{
int fd, uffd;
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index 7c4fa964c3b0..cb055282c89c 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -90,6 +90,12 @@ struct uffd_test_ops {
};
typedef struct uffd_test_ops uffd_test_ops_t;
+struct uffd_test_case_ops {
+ int (*pre_alloc)(const char **errmsg);
+ int (*post_alloc)(const char **errmsg);
+};
+typedef struct uffd_test_case_ops uffd_test_case_ops_t;
+
extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
extern int uffd, uffd_flags, finished, *pipefd, test_type;
@@ -102,15 +108,18 @@ extern uffd_test_ops_t anon_uffd_test_ops;
extern uffd_test_ops_t shmem_uffd_test_ops;
extern uffd_test_ops_t hugetlb_uffd_test_ops;
extern uffd_test_ops_t *uffd_test_ops;
+extern uffd_test_case_ops_t *uffd_test_case_ops;
void uffd_stats_report(struct uffd_args *args, int n_cpus);
int uffd_test_ctx_init(uint64_t features, const char **errmsg);
+void uffd_test_ctx_clear(void);
int userfaultfd_open(uint64_t *features);
int uffd_read_msg(int ufd, struct uffd_msg *msg);
void wp_range(int ufd, __u64 start, __u64 len, bool wp);
void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args);
int __copy_page(int ufd, unsigned long offset, bool retry, bool wp);
int copy_page(int ufd, unsigned long offset, bool wp);
+int move_page(int ufd, unsigned long offset, unsigned long len);
void *uffd_poll_thread(void *arg);
int uffd_open_dev(unsigned int flags);
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index 469e0476af26..7e83829bbb33 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -323,8 +323,10 @@ static int userfaultfd_stress(void)
uffd_stats_reset(args, nr_cpus);
/* bounce pass */
- if (stress(args))
+ if (stress(args)) {
+ uffd_test_ctx_clear();
return 1;
+ }
/* Clear all the write protections if there is any */
if (test_uffdio_wp)
@@ -354,6 +356,7 @@ static int userfaultfd_stress(void)
uffd_stats_report(args, nr_cpus);
}
+ uffd_test_ctx_clear();
return 0;
}
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 2709a34a39c5..cce90a10515a 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -23,6 +23,9 @@
#define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
+#define ALIGN_UP(x, align_to) \
+ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
+
struct mem_type {
const char *name;
unsigned int mem_flag;
@@ -78,6 +81,7 @@ typedef struct {
uffd_test_fn uffd_fn;
unsigned int mem_targets;
uint64_t uffd_feature_required;
+ uffd_test_case_ops_t *test_case_ops;
} uffd_test_case_t;
static void uffd_test_report(void)
@@ -185,6 +189,7 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
{
map_shared = mem_type->shared;
uffd_test_ops = mem_type->mem_ops;
+ uffd_test_case_ops = test->test_case_ops;
if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
page_size = default_huge_page_size();
@@ -1062,6 +1067,188 @@ static void uffd_poison_test(uffd_test_args_t *targs)
uffd_test_pass();
}
+static void
+uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args,
+ unsigned long len)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(len-1);
+
+ if (move_page(uffd, offset, len))
+ args->missing_faults++;
+}
+
+static void uffd_move_handle_fault(struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(msg, args, page_size);
+}
+
+static void uffd_move_pmd_handle_fault(struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(msg, args, read_pmd_pagesize());
+}
+
+static void
+uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
+ void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args))
+{
+ unsigned long nr;
+ pthread_t uffd_mon;
+ char c;
+ unsigned long long count;
+ struct uffd_args args = { 0 };
+ char *orig_area_src, *orig_area_dst;
+ unsigned long step_size, step_count;
+ unsigned long src_offs = 0;
+ unsigned long dst_offs = 0;
+
+ /* Prevent source pages from being mapped more than once */
+ if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK))
+ err("madvise(MADV_DONTFORK) failure");
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, false, false))
+ err("register failure");
+
+ args.handle_fault = handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ step_size = chunk_size / page_size;
+ step_count = nr_pages / step_size;
+
+ if (chunk_size > page_size) {
+ char *aligned_src = ALIGN_UP(area_src, chunk_size);
+ char *aligned_dst = ALIGN_UP(area_dst, chunk_size);
+
+ if (aligned_src != area_src || aligned_dst != area_dst) {
+ src_offs = (aligned_src - area_src) / page_size;
+ dst_offs = (aligned_dst - area_dst) / page_size;
+ step_count--;
+ }
+ orig_area_src = area_src;
+ orig_area_dst = area_dst;
+ area_src = aligned_src;
+ area_dst = aligned_dst;
+ }
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a missing
+ * fault. uffd_poll_thread will resolve the fault by moving source
+ * page to destination.
+ */
+ for (nr = 0; nr < step_count * step_size; nr += step_size) {
+ unsigned long i;
+
+ /* Check area_src content */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_src, nr + i);
+ if (count != count_verify[src_offs + nr + i])
+ err("nr %lu source memory invalid %llu %llu\n",
+ nr + i, count, count_verify[src_offs + nr + i]);
+ }
+
+ /* Faulting into area_dst should move the page or the huge page */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_dst, nr + i);
+ if (count != count_verify[dst_offs + nr + i])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[dst_offs + nr + i]);
+ }
+
+ /* Re-check area_src content which should be empty */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_src, nr + i);
+ if (count != 0)
+ err("nr %lu move failed %llu %llu\n",
+ nr, count, count_verify[src_offs + nr + i]);
+ }
+ }
+ if (step_size > page_size) {
+ area_src = orig_area_src;
+ area_dst = orig_area_dst;
+ }
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("join() failed");
+
+ if (args.missing_faults != step_count || args.minor_faults != 0)
+ uffd_test_fail("stats check error");
+ else
+ uffd_test_pass();
+}
+
+static void uffd_move_test(uffd_test_args_t *targs)
+{
+ uffd_move_test_common(targs, page_size, uffd_move_handle_fault);
+}
+
+static void uffd_move_pmd_test(uffd_test_args_t *targs)
+{
+ if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
+ err("madvise(MADV_HUGEPAGE) failure");
+ uffd_move_test_common(targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static void uffd_move_pmd_split_test(uffd_test_args_t *targs)
+{
+ if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
+ err("madvise(MADV_NOHUGEPAGE) failure");
+ uffd_move_test_common(targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static int prevent_hugepages(const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) {
+ /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
+ if (errno != EINVAL) {
+ if (errmsg)
+ *errmsg = "madvise(MADV_NOHUGEPAGE) failed";
+ return -errno;
+ }
+ }
+ return 0;
+}
+
+static int request_hugepages(const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) {
+ if (errmsg) {
+ *errmsg = (errno == EINVAL) ?
+ "CONFIG_TRANSPARENT_HUGEPAGE is not set" :
+ "madvise(MADV_HUGEPAGE) failed";
+ }
+ return -errno;
+ }
+ return 0;
+}
+
+struct uffd_test_case_ops uffd_move_test_case_ops = {
+ .post_alloc = prevent_hugepages,
+};
+
+struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
+ .post_alloc = request_hugepages,
+};
+
/*
* Test the returned uffdio_register.ioctls with different register modes.
* Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
@@ -1140,6 +1327,27 @@ uffd_test_case_t uffd_tests[] = {
.uffd_feature_required = 0,
},
{
+ .name = "move",
+ .uffd_fn = uffd_move_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_case_ops,
+ },
+ {
+ .name = "move-pmd",
+ .uffd_fn = uffd_move_pmd_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
+ .name = "move-pmd-split",
+ .uffd_fn = uffd_move_pmd_split_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
.name = "wp-fork",
.uffd_fn = uffd_wp_fork_test,
.mem_targets = MEM_ALL,
@@ -1319,6 +1527,7 @@ int main(int argc, char *argv[])
continue;
}
test->uffd_fn(&args);
+ uffd_test_ctx_clear();
}
}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 3082b40492dd..05736c615734 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -4,6 +4,7 @@
#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
+#include <linux/fs.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "../kselftest.h"
@@ -28,19 +29,92 @@ uint64_t pagemap_get_entry(int fd, char *start)
return entry;
}
+static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + psize());
+ arg.vec = (uintptr_t)r;
+ arg.vec_len = 1;
+ arg.flags = 0;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = 0;
+ arg.category_inverted = 0;
+ arg.category_mask = 0;
+ arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
+ arg.return_mask = arg.category_anyof_mask;
+
+ return ioctl(fd, PAGEMAP_SCAN, &arg);
+}
+
+static uint64_t pagemap_scan_get_categories(int fd, char *start)
+{
+ struct page_region r;
+ long ret;
+
+ ret = __pagemap_scan_get_categories(fd, start, &r);
+ if (ret < 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
+ if (ret == 0)
+ return 0;
+ return r.categories;
+}
+
+/* `start` is any valid address. */
+static bool pagemap_scan_supported(int fd, char *start)
+{
+ static int supported = -1;
+ int ret;
+
+ if (supported != -1)
+ return supported;
+
+ /* Provide an invalid address in order to trigger EFAULT. */
+ ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
+ if (ret == 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
+
+ supported = errno == EFAULT;
+
+ return supported;
+}
+
+static bool page_entry_is(int fd, char *start, char *desc,
+ uint64_t pagemap_flags, uint64_t pagescan_flags)
+{
+ bool m = pagemap_get_entry(fd, start) & pagemap_flags;
+
+ if (pagemap_scan_supported(fd, start)) {
+ bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
+
+ if (m == s)
+ return m;
+
+ ksft_exit_fail_msg(
+ "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
+ }
+ return m;
+}
+
bool pagemap_is_softdirty(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY;
+ return page_entry_is(fd, start, "soft-dirty",
+ PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
}
bool pagemap_is_swapped(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & PM_SWAP;
+ return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
}
bool pagemap_is_populated(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP);
+ return page_entry_is(fd, start, "populated",
+ PM_PRESENT | PM_SWAP,
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
}
unsigned long pagemap_get_pfn(int fd, char *start)