diff options
Diffstat (limited to 'tools/testing/selftests/mm')
66 files changed, 9544 insertions, 1286 deletions
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index d26e962f2ac4..824266982aa3 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -6,6 +6,7 @@ hugepage-shm hugepage-vmemmap hugetlb-madvise hugetlb-read-hwpoison +hugetlb-soft-offline khugepaged map_hugetlb map_populate @@ -19,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 @@ -26,6 +28,7 @@ protection_keys_64 madv_populate uffd-stress uffd-unit-tests +uffd-wp-mremap mlock-intersect-test mlock-random-test virtual_address_range @@ -35,6 +38,9 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 soft-dirty split_huge_page_test ksm_tests @@ -47,3 +53,10 @@ mkdirty va_high_addr_switch hugetlb_fault_after_madv hugetlb_madv_vs_map +mseal_test +droppable +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 +guard-regions +merge diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index eb5f39a2668b..ae6f994d3add 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -2,6 +2,7 @@ # Makefile for mm selftests LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h include local_config.mk @@ -12,7 +13,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) else uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+') endif -ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') +ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/powerpc/') endif # Without this, failed build products remain, with up-to-date timestamps, @@ -32,9 +33,27 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is +# automatically enabled at -O1 or above. This triggers various unused-result +# warnings where functions such as read() or write() are called and their +# return value is not checked. Disable _FORTIFY_SOURCE to silence those +# warnings. +CFLAGS += -U_FORTIFY_SOURCE + +KDIR ?= /lib/modules/$(shell uname -r)/build +ifneq (,$(wildcard $(KDIR)/Module.symvers)) +ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h)) +TEST_GEN_MODS_DIR := page_frag +else +PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel" +endif +else +PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first" +endif + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm @@ -42,6 +61,7 @@ TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugetlb-soft-offline TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm @@ -51,7 +71,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -59,18 +81,25 @@ TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test +TEST_GEN_FILES += mseal_test TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += uffd-wp-mremap TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests TEST_GEN_FILES += ksm_functional_tests TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map +TEST_GEN_FILES += hugetlb_dio +TEST_GEN_FILES += droppable +TEST_GEN_FILES += guard-regions +TEST_GEN_FILES += merge ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -82,6 +111,7 @@ CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_pr CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) VMTARGETS := protection_keys +VMTARGETS += pkey_sighandler_tests BINARIES_32 := $(VMTARGETS:%=%_32) BINARIES_64 := $(VMTARGETS:%=%_64) @@ -96,17 +126,19 @@ endif ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif -else -ifneq (,$(findstring $(ARCH),ppc64)) +else ifeq ($(ARCH),arm64) +TEST_GEN_FILES += protection_keys +TEST_GEN_FILES += pkey_sighandler_tests +else ifeq ($(ARCH),powerpc) TEST_GEN_FILES += protection_keys endif -endif - -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch +ifneq ($(ARCH),riscv64) TEST_GEN_FILES += virtual_address_range +endif TEST_GEN_FILES += write_to_hugetlbfs endif @@ -117,6 +149,7 @@ TEST_FILES += test_hmm.sh TEST_FILES += va_high_addr_switch.sh TEST_FILES += charge_reserved_hugetlb.sh TEST_FILES += hugetlb_reparenting_test.sh +TEST_FILES += test_page_frag.sh # required by charge_reserved_hugetlb.sh TEST_FILES += write_hugetlb_memory.sh @@ -128,11 +161,16 @@ $(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c +$(OUTPUT)/uffd-wp-mremap: uffd-common.c +$(OUTPUT)/protection_keys: pkey_util.c +$(OUTPUT)/pkey_sighandler_tests: pkey_util.c ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) +$(BINARIES_32) $(BINARIES_64): pkey_util.c + define gen-target-rule-32 $(1) $(1)_32: $(OUTPUT)/$(1)_32 .PHONY: $(1) $(1)_32 @@ -202,3 +240,12 @@ warn_missing_liburing: echo "Warning: missing liburing support. Some tests will be skipped." ; \ echo endif + +ifneq ($(PAGE_FRAG_WARNING),) +all: warn_missing_page_frag + +warn_missing_page_frag: + @echo ; \ + echo "Warning: $(PAGE_FRAG_WARNING). page_frag test will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index d680c00d2853..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi @@ -254,7 +254,7 @@ function cleanup_hugetlb_memory() { local cgroup="$1" if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then echo killing write_to_hugetlbfs - killall -2 write_to_hugetlbfs + killall -2 --wait write_to_hugetlbfs wait_for_hugetlb_memory_to_get_depleted $cgroup fi set -e diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 533999b6c284..9bc4591c7b16 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -82,12 +82,19 @@ int prereq(void) return -1; } -int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +int check_compaction(unsigned long mem_free, unsigned long hugepage_size, + unsigned long initial_nr_hugepages) { + unsigned long nr_hugepages_ul; int fd, ret = -1; int compaction_index = 0; - char initial_nr_hugepages[10] = {0}; - char nr_hugepages[10] = {0}; + char nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; + + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ @@ -101,26 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) goto out; } - if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { - ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); - goto close_fd; - } - - /* Start with the initial condition of 0 huge pages*/ - if (write(fd, "0", sizeof(char)) != sizeof(char)) { - ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); - goto close_fd; - } - - lseek(fd, 0, SEEK_SET); - - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } @@ -134,22 +133,27 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) /* We should have been able to request at least 1/3 rd of the memory in huge pages */ - compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10); + if (!nr_hugepages_ul) { + ksft_print_msg("ERROR: No memory is available as huge pages\n"); + goto close_fd; + } + compaction_index = mem_free/(nr_hugepages_ul * hugepage_size); lseek(fd, 0, SEEK_SET); - if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) - != strlen(initial_nr_hugepages)) { + if (write(fd, init_nr_hugepages, strlen(init_nr_hugepages)) + != strlen(init_nr_hugepages)) { ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n", strerror(errno)); goto close_fd; } - ksft_print_msg("Number of huge pages allocated = %d\n", - atoi(nr_hugepages)); + ksft_print_msg("Number of huge pages allocated = %lu\n", + nr_hugepages_ul); if (compaction_index > 3) { - ksft_print_msg("ERROR: Less that 1/%d of memory is available\n" + ksft_print_msg("ERROR: Less than 1/%d of memory is available\n" "as huge pages\n", compaction_index); goto close_fd; } @@ -163,6 +167,41 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) return ret; } +int set_zero_hugepages(unsigned long *initial_nr_hugepages) +{ + int fd, ret = -1; + char nr_hugepages[20] = {0}; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); + goto out; + } + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Start with the initial condition of 0 huge pages */ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); + goto close_fd; + } + + *initial_nr_hugepages = strtoul(nr_hugepages, NULL, 10); + ret = 0; + + close_fd: + close(fd); + + out: + return ret; +} int main(int argc, char **argv) { @@ -173,14 +212,19 @@ int main(int argc, char **argv) unsigned long mem_free = 0; unsigned long hugepage_size = 0; long mem_fragmentable_MB = 0; + unsigned long initial_nr_hugepages; ksft_print_header(); if (prereq() || geteuid()) - return ksft_exit_skip("Prerequisites unsatisfied\n"); + ksft_exit_skip("Prerequisites unsatisfied\n"); ksft_set_plan(1); + /* Start the test without hugepages reducing mem_free */ + if (set_zero_hugepages(&initial_nr_hugepages)) + ksft_exit_fail(); + lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; if (setrlimit(RLIMIT_MEMLOCK, &lim)) @@ -224,8 +268,9 @@ int main(int argc, char **argv) entry = entry->next; } - if (check_compaction(mem_free, hugepage_size) == 0) - return ksft_exit_pass(); + if (check_compaction(mem_free, hugepage_size, + initial_nr_hugepages) == 0) + ksft_exit_pass(); - return ksft_exit_fail(); + ksft_exit_fail(); } diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index 4309916f629e..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -7,3 +7,7 @@ CONFIG_TEST_HMM=m CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 363bf5f801be..dbbcc5eb3dce 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -112,9 +112,12 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { - if (pipe(comm_pipes->child_ready) < 0) + if (pipe(comm_pipes->child_ready) < 0) { + ksft_perror("pipe()"); return -errno; + } if (pipe(comm_pipes->parent_ready) < 0) { + ksft_perror("pipe()"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -199,7 +202,7 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size, typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, - child_fn fn) + child_fn fn, bool xfail) { struct comm_pipes comm_pipes; char buf; @@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { exit(fn(mem, size, &comm_pipes)); @@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * write-faults by directly mapping pages writable. */ ret = mprotect(mem, size, PROT_READ); - ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + + ret = mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -247,39 +260,53 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) { + log_test_result(KSFT_PASS); + } else if (xfail) { + /* + * With hugetlb, some vmsplice() tests are currently expected to + * fail because (a) harder to fix and (b) nobody really cares. + * Flag them as expected failure for now. + */ + log_test_result(KSFT_XFAIL); + } else { + log_test_result(KSFT_FAIL); + } close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_cow_in_parent(char *mem, size_t size) +static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, false, child_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); } -static void test_cow_in_parent_mprotect(char *mem, size_t size) +static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, true, child_memcmp_fn); + do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); } -static void test_vmsplice_in_child(char *mem, size_t size) +static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, + is_hugetlb); } -static void test_vmsplice_in_child_mprotect(char *mem, size_t size) +static void test_vmsplice_in_child_mprotect(char *mem, size_t size, + bool is_hugetlb) { - do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); + do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, + is_hugetlb); } static void do_test_vmsplice_in_parent(char *mem, size_t size, - bool before_fork) + bool before_fork, bool xfail) { struct iovec iov = { .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; @@ -292,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free; } if (pipe(fds) < 0) { - ksft_test_result_fail("pipe() failed\n"); + ksft_perror("pipe() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_print_msg("vmsplice() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -325,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (!before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_perror("vmsplice() failed"); + log_test_result(KSFT_FAIL); wait(&ret); goto close_pipe; } @@ -334,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; if (munmap(mem, size) < 0) { - ksft_test_result_fail("munmap() failed\n"); + ksft_perror("munmap() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } write(comm_pipes.parent_ready[1], "0", 1); @@ -342,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, /* Wait until the child is done writing. */ wait(&ret); if (!WIFEXITED(ret)) { - ksft_test_result_fail("wait() failed\n"); + ksft_perror("wait() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } @@ -350,13 +383,24 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) { - ksft_test_result_fail("read() failed\n"); + ksft_perror("read() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } } - ksft_test_result(!memcmp(old, new, transferred), - "No leak from child into parent\n"); + if (!memcmp(old, new, transferred)) { + log_test_result(KSFT_PASS); + } else if (xfail) { + /* + * With hugetlb, some vmsplice() tests are currently expected to + * fail because (a) harder to fix and (b) nobody really cares. + * Flag them as expected failure for now. + */ + log_test_result(KSFT_XFAIL); + } else { + log_test_result(KSFT_FAIL); + } close_pipe: close(fds[0]); close(fds[1]); @@ -367,14 +411,14 @@ free: free(new); } -static void test_vmsplice_before_fork(char *mem, size_t size) +static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) { - do_test_vmsplice_in_parent(mem, size, true); + do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); } -static void test_vmsplice_after_fork(char *mem, size_t size) +static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) { - do_test_vmsplice_in_parent(mem, size, false); + do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -392,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } fd = fileno(file); @@ -406,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); goto close_file; } /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed\n"); + log_test_result(KSFT_SKIP); goto free_tmp; } @@ -428,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) iov.iov_len = size; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed\n"); + log_test_result(KSFT_SKIP); goto queue_exit; } @@ -439,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -459,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) * if the page is mapped R/O vs. R/W). */ ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + goto unregister_buffers; + } + clear_softdirty(); - ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + ret = mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } } @@ -474,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) memset(mem, 0xff, size); sqe = io_uring_get_sqe(&ring); if (!sqe) { - ksft_test_result_fail("io_uring_get_sqe() failed\n"); + ksft_print_msg("io_uring_get_sqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); ret = io_uring_submit(&ring); if (ret < 0) { - ksft_test_result_fail("io_uring_submit() failed\n"); + ksft_print_msg("io_uring_submit() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { - ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + ksft_print_msg("io_uring_wait_cqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } if (cqe->res != size) { - ksft_test_result_fail("write_fixed failed\n"); + ksft_print_msg("write_fixed failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_cqe_seen(&ring, cqe); @@ -502,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_test_result_fail("pread() failed\n"); + ksft_print_msg("pread() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } total += cur; } /* Finally, check if we read what we expected. */ - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/W pin is reliable\n"); + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); quit_child: if (use_fork) { @@ -529,12 +592,12 @@ close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_iouring_ro(char *mem, size_t size) +static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) { do_test_iouring(mem, size, false); } -static void test_iouring_fork(char *mem, size_t size) +static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) { do_test_iouring(mem, size, true); } @@ -558,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, int ret; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + log_test_result(KSFT_SKIP); return; } tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free_tmp; } @@ -585,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -622,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, clear_softdirty(); ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -637,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret) { if (errno == EINVAL) - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_SKIP; else - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_FAIL; + ksft_perror("PIN_LONGTERM_TEST_START failed"); + log_test_result(ret); goto wait; } @@ -652,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); - if (ret) - ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); - else - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/O pin is reliable\n"); + if (ret) { + ksft_perror("PIN_LONGTERM_TEST_READ failed"); + log_test_result(KSFT_FAIL); + } else { + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); + } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); if (ret) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_perror("PIN_LONGTERM_TEST_STOP failed"); wait: switch (test) { case RO_PIN_TEST_SHARED: write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); if (!WIFEXITED(ret)) - ksft_print_msg("[INFO] wait() failed\n"); + ksft_perror("wait() failed"); break; default: break; @@ -678,37 +751,41 @@ free_tmp: free(tmp); } -static void test_ro_pin_on_shared(char *mem, size_t size) +static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); } -static void test_ro_fast_pin_on_shared(char *mem, size_t size) +static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); } -static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) +static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); } -static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) +static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); } -static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) +static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); } -static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) +static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); } -typedef void (*test_fn)(char *mem, size_t size); +typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); static void do_run_with_base_page(test_fn fn, bool swapout) { @@ -718,42 +795,45 @@ static void do_run_with_base_page(test_fn fn, bool swapout) mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); /* Ignore if not around on a kernel. */ if (ret && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_perror("MADV_NOHUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } /* Populate a base page. */ - memset(mem, 0, pagesize); + memset(mem, 1, pagesize); if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); if (!pagemap_is_swapped(pagemap_fd, mem)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } } - fn(mem, pagesize); + fn(mem, pagesize, false); munmap: munmap(mem, pagesize); } static void run_with_base_page(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with base page\n", desc); + log_test_start("%s ... with base page", desc); do_run_with_base_page(fn, false); } static void run_with_base_page_swap(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + log_test_start("%s ... with swapped out base page", desc); do_run_with_base_page(fn, true); } @@ -779,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } @@ -788,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) ret = madvise(mem, thpsize, MADV_HUGEPAGE); if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + ksft_perror("MADV_HUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -796,12 +878,13 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) * Try to populate a THP. Touch the first sub-page and test if * we get the last sub-page populated automatically. */ - mem[0] = 0; + mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { - ksft_test_result_skip("Did not get a THP populated\n"); + ksft_print_msg("Did not get a THP populated\n"); + log_test_result(KSFT_SKIP); goto munmap; } - memset(mem, 0, thpsize); + memset(mem, 1, thpsize); size = thpsize; switch (thp_run) { @@ -818,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -835,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); if (ret) { - ksft_test_result_fail("MADV_DONTNEED failed\n"); + ksft_perror("MADV_DONTNEED failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = pagesize; @@ -848,14 +934,16 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (mremap_mem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); if (tmp != mremap_mem) { - ksft_test_result_fail("mremap() failed\n"); + ksft_perror("mremap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = mremap_size; @@ -868,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto munmap; } else if (!ret) { exit(0); @@ -882,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) /* Allow for sharing all pages again. */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -896,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) case THP_RUN_SINGLE_PTE_SWAPOUT: madvise(mem, size, MADV_PAGEOUT); if (!range_is_swapped(mem, size)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } break; @@ -904,7 +996,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) break; } - fn(mem, size); + fn(mem, size, false); munmap: munmap(mmap_mem, mmap_size); if (mremap_mem != MAP_FAILED) @@ -913,56 +1005,56 @@ munmap: static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + log_test_start("%s ... with THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD, size); } static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + log_test_start("%s ... with swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE, size); } static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + log_test_start("%s ... with single PTE of THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + log_test_start("%s ... with partially shared THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } @@ -972,19 +1064,20 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; char *mem, *dummy; - ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); return; } /* Populate an huge page. */ - memset(mem, 0, hugetlbsize); + memset(mem, 1, hugetlbsize); /* * We need a total of two hugetlb pages to handle COW/unsharing @@ -992,12 +1085,13 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) */ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (dummy == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto munmap; } munmap(dummy, hugetlbsize); - fn(mem, hugetlbsize); + fn(mem, hugetlbsize, true); munmap: munmap(mem, hugetlbsize); } @@ -1036,7 +1130,7 @@ static const struct test_case anon_test_cases[] = { */ { "vmsplice() + unmap in child", - test_vmsplice_in_child + test_vmsplice_in_child, }, /* * vmsplice() test, but do an additional mprotect(PROT_READ)+ @@ -1044,7 +1138,7 @@ static const struct test_case anon_test_cases[] = { */ { "vmsplice() + unmap in child with mprotect() optimization", - test_vmsplice_in_child_mprotect + test_vmsplice_in_child_mprotect, }, /* * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after @@ -1198,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } @@ -1208,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } @@ -1222,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before actually COW-sharing the page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); goto close_comm_pipes; } break; @@ -1234,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the upper part of the THP. */ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1242,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the lower part of the THP. */ ret = madvise(mem, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1252,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { switch (test) { @@ -1286,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = madvise(mem, size, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1296,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before anyone modified the COW-shared page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1317,28 +1417,39 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_anon_thp_collapse_unshared(char *mem, size_t size) +static void test_anon_thp_collapse_unshared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); } -static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) +static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); } -static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) +static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); } -static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) +static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); } @@ -1394,7 +1505,7 @@ static void run_anon_thp_test_cases(void) for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { struct test_case const *test_case = &anon_thp_test_cases[i]; - ksft_print_msg("[RUN] %s\n", test_case->desc); + log_test_start("%s", test_case->desc); do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } @@ -1417,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - ksft_test_result(!memcmp(smem, old, size), - "Other mapping not modified\n"); + if (!memcmp(smem, old, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); free(old); } @@ -1436,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; - ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + log_test_start("%s ... with shared zeropage", desc); mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1468,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) size_t mmap_size; int ret; - ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + log_test_start("%s ... with huge zeropage", desc); if (!has_huge_zeropage) { - ksft_test_result_skip("Huge zeropage not enabled\n"); + ksft_print_msg("Huge zeropage not enabled\n"); + log_test_result(KSFT_SKIP); return; } @@ -1480,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } mmap_smem = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1495,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); + goto munmap; + } ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1526,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_FAIL); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1571,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_skip("fileno() failed\n"); + ksft_perror("fileno() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1623,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, hugetlbsize)) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } @@ -1644,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1660,7 +1797,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, fn(mem, smem, hugetlbsize); munmap: munmap(mem, hugetlbsize); - if (mem != MAP_FAILED) + if (smem != MAP_FAILED) munmap(smem, hugetlbsize); close: close(fd); @@ -1735,7 +1872,6 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { - int err; struct thp_settings default_settings; ksft_print_header(); @@ -1775,9 +1911,5 @@ int main(int argc, char **argv) thp_restore_settings(); } - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - return ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c new file mode 100644 index 000000000000..f3d9ecf96890 --- /dev/null +++ b/tools/testing/selftests/mm/droppable.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <linux/mman.h> + +#include "../kselftest.h" + +int main(int argc, char *argv[]) +{ + size_t alloc_size = 134217728; + size_t page_size = getpagesize(); + void *alloc; + pid_t child; + + ksft_print_header(); + ksft_set_plan(1); + + alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0); + assert(alloc != MAP_FAILED); + memset(alloc, 'A', alloc_size); + for (size_t i = 0; i < alloc_size; i += page_size) + assert(*(uint8_t *)(alloc + i)); + + child = fork(); + assert(child >= 0); + if (!child) { + for (;;) + *(char *)malloc(page_size) = 'B'; + } + + for (bool done = false; !done;) { + for (size_t i = 0; i < alloc_size; i += page_size) { + if (!*(uint8_t *)(alloc + i)) { + done = true; + break; + } + } + } + kill(child, SIGTERM); + + ksft_test_result_pass("MAP_DROPPABLE: PASS\n"); + exit(KSFT_PASS); +} diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c new file mode 100644 index 000000000000..93af3d3760f9 --- /dev/null +++ b/tools/testing/selftests/mm/guard-regions.c @@ -0,0 +1,2148 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <asm-generic/mman.h> /* Force the import of the tools version. */ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/limits.h> +#include <linux/userfaultfd.h> +#include <linux/fs.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <unistd.h> +#include "vm_util.h" + +#include "../pidfd/pidfd.h" + +/* + * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: + * + * "If the signal occurs other than as the result of calling the abort or raise + * function, the behavior is undefined if the signal handler refers to any + * object with static storage duration other than by assigning a value to an + * object declared as volatile sig_atomic_t" + */ +static volatile sig_atomic_t signal_jump_set; +static sigjmp_buf signal_jmp_buf; + +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + +/* + * How is the test backing the mapping being tested? + */ +enum backing_type { + ANON_BACKED, + SHMEM_BACKED, + LOCAL_FILE_BACKED, +}; + +FIXTURE(guard_regions) +{ + unsigned long page_size; + char path[PATH_MAX]; + int fd; +}; + +FIXTURE_VARIANT(guard_regions) +{ + enum backing_type backing; +}; + +FIXTURE_VARIANT_ADD(guard_regions, anon) +{ + .backing = ANON_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, shmem) +{ + .backing = SHMEM_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, file) +{ + .backing = LOCAL_FILE_BACKED, +}; + +static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant) +{ + switch (variant->backing) { + case ANON_BACKED: + case SHMEM_BACKED: + return true; + default: + return false; + } +} + +static void *mmap_(FIXTURE_DATA(guard_regions) * self, + const FIXTURE_VARIANT(guard_regions) * variant, + void *addr, size_t length, int prot, int extra_flags, + off_t offset) +{ + int fd; + int flags = extra_flags; + + switch (variant->backing) { + case ANON_BACKED: + flags |= MAP_PRIVATE | MAP_ANON; + fd = -1; + break; + case SHMEM_BACKED: + case LOCAL_FILE_BACKED: + flags |= MAP_SHARED; + fd = self->fd; + break; + default: + ksft_exit_fail(); + break; + } + + return mmap(addr, length, prot, flags, fd, offset); +} + +static int userfaultfd(int flags) +{ + return syscall(SYS_userfaultfd, flags); +} + +static void handle_fatal(int c) +{ + if (!signal_jump_set) + return; + + siglongjmp(signal_jmp_buf, c); +} + +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t n, int advice, unsigned int flags) +{ + return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags); +} + +/* + * Enable our signal catcher and try to read/write the specified buffer. The + * return value indicates whether the read/write succeeds without a fatal + * signal. + */ +static bool try_access_buf(char *ptr, bool write) +{ + bool failed; + + /* Tell signal handler to jump back here on fatal signal. */ + signal_jump_set = true; + /* If a fatal signal arose, we will jump back here and failed is set. */ + failed = sigsetjmp(signal_jmp_buf, 0) != 0; + + if (!failed) { + if (write) + *ptr = 'x'; + else + FORCE_READ(ptr); + } + + signal_jump_set = false; + return !failed; +} + +/* Try and read from a buffer, return true if no fatal signal. */ +static bool try_read_buf(char *ptr) +{ + return try_access_buf(ptr, false); +} + +/* Try and write to a buffer, return true if no fatal signal. */ +static bool try_write_buf(char *ptr) +{ + return try_access_buf(ptr, true); +} + +/* + * Try and BOTH read from AND write to a buffer, return true if BOTH operations + * succeed. + */ +static bool try_read_write_buf(char *ptr) +{ + return try_read_buf(ptr) && try_write_buf(ptr); +} + +static void setup_sighandler(void) +{ + struct sigaction act = { + .sa_handler = &handle_fatal, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + if (sigaction(SIGSEGV, &act, NULL)) + ksft_exit_fail_perror("sigaction"); +} + +static void teardown_sighandler(void) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, NULL); +} + +static int open_file(const char *prefix, char *path) +{ + int fd; + + snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix); + fd = mkstemp(path); + if (fd < 0) + ksft_exit_fail_perror("mkstemp"); + + return fd; +} + +/* Establish a varying pattern in a buffer. */ +static void set_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + char *ptr2 = &ptr[i * page_size]; + + memset(ptr2, 'a' + (i % 26), page_size); + } +} + +/* + * Check that a buffer contains the pattern set by set_pattern(), starting at a + * page offset of pgoff within the buffer. + */ +static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size, + size_t pgoff) +{ + size_t i; + + for (i = 0; i < num_pages * page_size; i++) { + size_t offset = pgoff * page_size + i; + char actual = ptr[offset]; + char expected = 'a' + ((offset / page_size) % 26); + + if (actual != expected) + return false; + } + + return true; +} + +/* Check that a buffer contains the pattern set by set_pattern(). */ +static bool check_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + return check_pattern_offset(ptr, num_pages, page_size, 0); +} + +/* Determine if a buffer contains only repetitions of a specified char. */ +static bool is_buf_eq(char *buf, size_t size, char chr) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (buf[i] != chr) + return false; + } + + return true; +} + +FIXTURE_SETUP(guard_regions) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + setup_sighandler(); + + switch (variant->backing) { + case ANON_BACKED: + return; + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } + + /* We truncate file to at least 100 pages, tests can modify as needed. */ + ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); +}; + +FIXTURE_TEARDOWN_PARENT(guard_regions) +{ + teardown_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + if (self->fd >= 0) + close(self->fd); + + if (self->path[0] != '\0') + unlink(self->path); +} + +TEST_F(guard_regions, basic) +{ + const unsigned long NUM_PAGES = 10; + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Trivially assert we can touch the first page. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + + /* Establish that 1st page SIGSEGV's. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* Ensure we can touch everything else.*/ + for (i = 1; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Establish a guard page at the end of the mapping. */ + ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_INSTALL), 0); + + /* Check that both guard pages result in SIGSEGV. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* Remove the first guard page. */ + ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + /* Remove the last guard page. */ + ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* + * Test setting a _range_ of pages, namely the first 3. The first of + * these be faulted in, so this also tests that we can install guard + * pages over backed pages. + */ + ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure they are all guard pages. */ + for (i = 0; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Make sure the rest are not. */ + for (i = 3; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Remove guard pages. */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure we can touch everything. */ + for (i = 0; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * Now remove all guard pages, make sure we don't remove existing + * entries. + */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < NUM_PAGES * page_size; i += page_size) { + char chr = ptr[i]; + + ASSERT_EQ(chr, 'x'); + } + + ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0); +} + +/* Assert that operations applied across multiple VMAs work as expected. */ +TEST_F(guard_regions, multi_vma) +{ + const unsigned long page_size = self->page_size; + char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; + int i; + + /* Reserve a 100 page region over which we can install VMAs. */ + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* Place a VMA of 10 pages size at the start of the region. */ + ptr1 = mmap_(self, variant, ptr_region, 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + /* Place a VMA of 5 pages size 50 pages into the region. */ + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Place a VMA of 20 pages size at the end of the region. */ + ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Unmap gaps. */ + ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---] [---] [---] + */ + + /* + * Now mark the whole range as guard pages and make sure all VMAs are as + * such. + */ + + /* + * madvise() is certifiable and lets you perform operations over gaps, + * everything works, but it indicates an error and errno is set to + * -ENOMEM. Also if anything runs out of memory it is set to + * -ENOMEM. You are meant to guess which is which. + */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now remove guar pages over range and assert the opposite. */ + + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Now map incompatible VMAs in the gaps. */ + ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---][xxxx][---][xxxx][---] + * + * Where 'x' signifies VMAs that cannot be merged with those adjacent to + * them. + */ + + /* Multiple VMAs adjacent to one another should result in no error. */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0); +} + +/* + * Assert that batched operations performed using process_madvise() work as + * expected. + */ +TEST_F(guard_regions, process_madvise) +{ + const unsigned long page_size = self->page_size; + char *ptr_region, *ptr1, *ptr2, *ptr3; + ssize_t count; + struct iovec vec[6]; + + /* Reserve region to map over. */ + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* + * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we + * overwrite existing entries and test this code path against + * overwriting existing entries. + */ + ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0); + ASSERT_NE(ptr1, MAP_FAILED); + /* We want guard markers at start/end of each VMA. */ + vec[0].iov_base = ptr1; + vec[0].iov_len = page_size; + vec[1].iov_base = &ptr1[9 * page_size]; + vec[1].iov_len = page_size; + + /* 5 pages offset 50 pages into reserve region. */ + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr2, MAP_FAILED); + vec[2].iov_base = ptr2; + vec[2].iov_len = page_size; + vec[3].iov_base = &ptr2[4 * page_size]; + vec[3].iov_len = page_size; + + /* 20 pages offset 79 pages into reserve region. */ + ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr3, MAP_FAILED); + vec[4].iov_base = ptr3; + vec[4].iov_len = page_size; + vec[5].iov_base = &ptr3[19 * page_size]; + vec[5].iov_len = page_size; + + /* Free surrounding VMAs. */ + ASSERT_EQ(munmap(ptr_region, page_size), 0); + ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); + + /* Now guard in one step. */ + count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0); + + /* OK we don't have permission to do this, skip. */ + if (count == -1 && errno == EPERM) + ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + + /* Returns the number of bytes advised. */ + ASSERT_EQ(count, 6 * page_size); + + /* Now make sure the guarding was applied. */ + + ASSERT_FALSE(try_read_write_buf(ptr1)); + ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr2)); + ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr3)); + ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Now do the same with unguard... */ + count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0); + + /* ...and everything should now succeed. */ + + ASSERT_TRUE(try_read_write_buf(ptr1)); + ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr2)); + ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr3)); + ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr1, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr2, 5 * page_size), 0); + ASSERT_EQ(munmap(ptr3, 20 * page_size), 0); +} + +/* Assert that unmapping ranges does not leave guard markers behind. */ +TEST_F(guard_regions, munmap) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new1, *ptr_new2; + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard first and last pages. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Assert that they are guarded. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size])); + + /* Unmap them. */ + ASSERT_EQ(munmap(ptr, page_size), 0); + ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); + + /* Map over them.*/ + ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED, 0); + ASSERT_NE(ptr_new1, MAP_FAILED); + ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr_new2, MAP_FAILED); + + /* Assert that they are now not guarded. */ + ASSERT_TRUE(try_read_write_buf(ptr_new1)); + ASSERT_TRUE(try_read_write_buf(ptr_new2)); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mprotect() operations have no bearing on guard markers. */ +TEST_F(guard_regions, mprotect) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the middle of the range. */ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Assert that it is indeed guarded. */ + ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size])); + + /* Now make these pages read-only. */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0); + + /* Make sure the range is still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Make sure we can guard again without issue.*/ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Make sure the range is, yet again, still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Now unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is readable. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Split and merge VMAs and make sure guard pages still behave. */ +TEST_F(guard_regions, split_merge) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + int i; + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now unmap some pages in the range so we split. */ + ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0); + + /* Make sure the remaining ranges are guarded post-split. */ + for (i = 0; i < 2; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 2; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 6; i < 8; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 9; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now map them again - the unmap will have cleared the guards. */ + ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now make sure guard pages are established. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + bool expect_true = i == 2 || i == 5 || i == 8; + + ASSERT_TRUE(expect_true ? result : !result); + } + + /* Now guard everything again. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now split the range into three. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* Make sure the whole range is guarded for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_buf(curr)); + } + + /* Now reset protection bits so we merge the whole thing. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Make sure the whole range is still guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Split range into 3 again... */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* ...and unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is remedied for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Merge them again. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Now ensure the merged range is remedied for read/write. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that MADV_DONTNEED does not remove guard markers. */ +TEST_F(guard_regions, dontneed) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Back the whole range. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Guard every other page. */ + for (i = 0; i < 10; i += 2) { + char *curr = &ptr[i * page_size]; + int res = madvise(curr, page_size, MADV_GUARD_INSTALL); + + ASSERT_EQ(res, 0); + } + + /* Indicate that we don't need any of the range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0); + + /* Check to ensure guard markers are still in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_buf(curr); + + if (i % 2 == 0) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + switch (variant->backing) { + case ANON_BACKED: + /* If anon, then we get a zero page. */ + ASSERT_EQ(*curr, '\0'); + break; + default: + /* Otherwise, we get the file data. */ + ASSERT_EQ(*curr, 'y'); + break; + } + } + + /* Now write... */ + result = try_write_buf(&ptr[i * page_size]); + + /* ...and make sure same result. */ + ASSERT_TRUE(i % 2 != 0 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mlock()'ed pages work correctly with guard markers. */ +TEST_F(guard_regions, mlock) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Lock. */ + ASSERT_EQ(mlock(ptr, 10 * page_size), 0); + + /* Now try to guard, should fail with EINVAL. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, EINVAL); + + /* OK unlock. */ + ASSERT_EQ(munlock(ptr, 10 * page_size), 0); + + /* Guard first half of range, should now succeed. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure guard works. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + if (i < 5) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + ASSERT_EQ(*curr, 'x'); + } + } + + /* + * Now lock the latter part of the range. We can't lock the guard pages, + * as this would result in the pages being populated and the guarding + * would cause this to error out. + */ + ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Now remove guard pages, we permit mlock()'d ranges to have guard + * pages removed as it is a non-destructive operation. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now check that no guard pages remain. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * - Moving a mapping alone should retain markers as they are. + */ +TEST_F(guard_regions, mremap_move) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 5 pages. */ + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guard pages are in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Map a new region we will move this range into. Doing this ensures + * that we have reserved a range to map into. + */ + ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new); + + /* Make sure the guard markers are retained. */ + ASSERT_FALSE(try_read_write_buf(ptr_new)); + ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size])); + + /* + * Clean up - we only need reference the new pointer as we overwrote the + * PROT_NONE range and moved the existing one. + */ + munmap(ptr_new, 5 * page_size); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Expanding should retain guard pages, only now in different position. The user + * will have to remove guard pages manually to fix up (they'd have to do the + * same if it were a PROT_NONE mapping). + */ +TEST_F(guard_regions, mremap_expand) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 10 pages... */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* ...But unmap the last 5 so we can ensure we can expand into them. */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now expand to 10 pages. */ + ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Reserve a region which we can move to and expand into. */ + ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now move and expand into it. */ + ptr = mremap(ptr, 10 * page_size, 20 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new); + ASSERT_EQ(ptr, ptr_new); + + /* + * Again, make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* + * A real user would have to remove guard markers, but would reasonably + * expect all characteristics of the mapping to be retained, including + * guard markers. + */ + + /* Cleanup. */ + munmap(ptr, 20 * page_size); +} +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Shrinking will result in markers that are shrunk over being removed. Again, + * if the user were using a PROT_NONE mapping they'd have to manually fix this + * up also so this is OK. + */ +TEST_F(guard_regions, mremap_shrink) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 5 pages. */ + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now shrink to 3 pages. */ + ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE); + ASSERT_NE(ptr, MAP_FAILED); + + /* We expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * As with expansion, a real user would have to remove guard pages and + * fixup. But you'd have to do similar manual things with PROT_NONE + * mappings too. + */ + + /* + * If we expand back to the original size, the end marker will, of + * course, no longer be present. + */ + ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Again, we expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + munmap(ptr, 5 * page_size); +} + +/* + * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set + * retain guard pages. + */ +TEST_F(guard_regions, fork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish guard pages in the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Assert that the guarding is in effect. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Now unguard the range.*/ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Child unguard does not impact parent page table state. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert expected behaviour after we fork populated ranges of anonymous memory + * and then guard and unguard the range. + */ +TEST_F(guard_regions, fork_cow) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + if (variant->backing != ANON_BACKED) + SKIP(return, "CoW only supported on anon mappings"); + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate range. */ + for (i = 0; i < 10 * page_size; i++) { + char chr = 'a' + (i % 26); + + ptr[i] = chr; + } + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Ensure the range is as expected. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Establish guard pages across the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + /* Remove it. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * By removing the guard pages, the page tables will be + * cleared. Assert that we are looking at the zero page now. + */ + for (i = 0; i < 10 * page_size; i++) { + char actual = ptr[i]; + + ASSERT_EQ(actual, '\0'); + } + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Ensure the range is unchanged in parent anon range. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that forking a process with VMAs that do have VM_WIPEONFORK set + * behave as expected. + */ +TEST_F(guard_regions, fork_wipeonfork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + if (variant->backing != ANON_BACKED) + SKIP(return, "Wipe on fork only supported on anon mappings"); + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark wipe on fork. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0); + + /* Guard the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Guard will have been wiped. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + exit(0); + } + + /* Parent process. */ + + waitpid(pid, NULL, 0); + + /* Guard markers should be in effect.*/ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_FREE retains guard entries as expected. */ +TEST_F(guard_regions, lazyfree) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != ANON_BACKED) + SKIP(return, "MADV_FREE only supported on anon mappings"); + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensure guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Lazyfree range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0); + + /* This should leave the guard markers in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ +TEST_F(guard_regions, populate) +{ + const unsigned long page_size = self->page_size; + char *ptr; + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Populate read should error out... */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1); + ASSERT_EQ(errno, EFAULT); + + /* ...as should populate write. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1); + ASSERT_EQ(errno, EFAULT); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ +TEST_F(guard_regions, cold_pageout) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensured guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now mark cold. This should have no impact on guard markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* OK, now page out. This should equally, have no effect on markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that guard pages do not break userfaultd. */ +TEST_F(guard_regions, uffd) +{ + const unsigned long page_size = self->page_size; + int uffd; + char *ptr; + int i; + struct uffdio_api api = { + .api = UFFD_API, + .features = 0, + }; + struct uffdio_register reg; + struct uffdio_range range; + + if (!is_anon_backed(variant)) + SKIP(return, "uffd only works on anon backing"); + + /* Set up uffd. */ + uffd = userfaultfd(0); + if (uffd == -1) { + switch (errno) { + case EPERM: + SKIP(return, "No userfaultfd permissions, try running as root."); + break; + case ENOSYS: + SKIP(return, "userfaultfd is not supported/not enabled."); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", + strerror(errno)); + break; + } + } + + ASSERT_NE(uffd, -1); + + ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Register the range with uffd. */ + range.start = (unsigned long)ptr; + range.len = 10 * page_size; + reg.range = range; + reg.mode = UFFDIO_REGISTER_MODE_MISSING; + ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, ®), 0); + + /* Guard the range. This should not trigger the uffd. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* The guarding should behave as usual with no uffd intervention. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0); + close(uffd); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we + * aggressively read-ahead, then install guard regions and assert that it + * behaves correctly. + * + * We page out using MADV_PAGEOUT before checking guard regions so we drop page + * cache folios, meaning we maximise the possibility of some broken readahead. + */ +TEST_F(guard_regions, madvise_sequential) +{ + char *ptr; + int i; + const unsigned long page_size = self->page_size; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern of data in the file. */ + set_pattern(ptr, 10, page_size); + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Mark it as being accessed sequentially. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0); + + /* Mark every other page a guard page. */ + for (i = 0; i < 10; i += 2) { + char *ptr2 = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now page it out. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Now make sure pages are as expected. */ + for (i = 0; i < 10; i++) { + char *chrp = &ptr[i * page_size]; + + if (i % 2 == 0) { + bool result = try_read_write_buf(chrp); + + ASSERT_FALSE(result); + } else { + ASSERT_EQ(*chrp, 'a' + i); + } + } + + /* Now remove guard pages. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure all data is as expected. */ + if (!check_pattern(ptr, 10, page_size)) + ASSERT_TRUE(false); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Check that file-backed mappings implement guard regions with MAP_PRIVATE + * correctly. + */ +TEST_F(guard_regions, map_private) +{ + const unsigned long page_size = self->page_size; + char *ptr_shared, *ptr_private; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MAP_PRIVATE test specific to file-backed"); + + ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr_shared, MAP_FAILED); + + /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */ + ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0); + ASSERT_NE(ptr_private, MAP_FAILED); + + /* Set pattern in shared mapping. */ + set_pattern(ptr_shared, 10, page_size); + + /* Install guard regions in every other page in the shared mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_shared[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Install guard regions in every other page in the private mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Every odd private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from shared mapping. */ + ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Every even private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from private mapping. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Ensure patterns are intact. */ + ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size)); + ASSERT_TRUE(check_pattern(ptr_private, 10, page_size)); + + /* Now write out every other page to MAP_PRIVATE. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + memset(ptr, 'a' + i, page_size); + } + + /* + * At this point the mapping is: + * + * 0123456789 + * SPSPSPSPSP + * + * Where S = shared, P = private mappings. + */ + + /* Now mark the beginning of the mapping guarded. */ + ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* + * This renders the mapping: + * + * 0123456789 + * xxxxxPSPSP + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* Ensure guard regions as expected. */ + ASSERT_EQ(try_read_buf(ptr), i >= 5); + /* The shared mapping should always succeed. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + } + + /* Remove the guard regions altogether. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * + * We now expect the mapping to be: + * + * 0123456789 + * SSSSSPSPSP + * + * As we removed guard regions, the private pages from the first 5 will + * have been zapped, so on fault will reestablish the shared mapping. + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* + * Assert that shared mappings in the MAP_PRIVATE mapping match + * the shared mapping. + */ + if (i < 5 || i % 2 == 0) { + char *ptr_s = &ptr_shared[i * page_size]; + + ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0); + continue; + } + + /* Everything else is a private mapping. */ + ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i)); + } + + ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0); +} + +/* Test that guard regions established over a read-only mapping function correctly. */ +TEST_F(guard_regions, readonly_file) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != LOCAL_FILE_BACKED) + SKIP(return, "Read-only test specific to file-backed"); + + /* Map shared so we can populate with pattern, populate it, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + /* Close the fd so we can re-open read-only. */ + ASSERT_EQ(close(self->fd), 0); + + /* Re-open read-only. */ + self->fd = open(self->path, O_RDONLY); + ASSERT_NE(self->fd, -1); + /* Re-map read-only. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark every other page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Assert that the guard regions are in place.*/ + for (i = 0; i < 10; i++) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0); + } + + /* Remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure the data is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, fault_around) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Fault-around test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern in the backing file. */ + set_pattern(ptr, 10, page_size); + + /* + * Now drop it from the page cache so we get major faults when next we + * map it. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Unmap and remap 'to be sure'. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now fault in every odd page. This should trigger fault-around. */ + for (i = 1; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Finally, ensure that guard regions are intact as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, truncation) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Establish a pattern in the backing file, just so there is data + * there. + */ + set_pattern(ptr, 10, page_size); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to actually used size (initialised to 100). */ + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Here the guard regions will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to half the size, then truncate again to the full size. */ + ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Again, guard pages will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, hole_punch) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + /* Establish pattern in mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + + /* Install a guard region in the middle of the mapping. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_GUARD_INSTALL), 0); + + /* + * The buffer will now be: + * + * 0123456789 + * ***xxxx*** + * + * Where * is data and x is the guard region. + */ + + /* Ensure established. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now hole punch the guarded region. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_REMOVE), 0); + + /* Ensure guard regions remain. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now remove guard region throughout. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Check that the pattern exists in non-hole punched region. */ + ASSERT_TRUE(check_pattern(ptr, 3, page_size)); + /* Check that hole punched region is zeroed. */ + ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0')); + /* Check that the pattern exists in the remainder of the file. */ + ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Ensure that a memfd works correctly with guard regions, that we can write + * seal it then open the mapping read-only and still establish guard regions + * within, remove those guard regions and have everything work correctly. + */ +TEST_F(guard_regions, memfd_write_seal) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != SHMEM_BACKED) + SKIP(return, "memfd write seal test specific to shmem"); + + /* OK, we need a memfd, so close existing one. */ + ASSERT_EQ(close(self->fd), 0); + + /* Create and truncate memfd. */ + self->fd = memfd_create("guard_regions_memfd_seals_test", + MFD_ALLOW_SEALING); + ASSERT_NE(self->fd, -1); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Map, set pattern, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + + /* Write-seal the memfd. */ + ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0); + + /* Now map the memfd readonly. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Ensure write seal intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_FALSE(try_write_buf(ptr_p)); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + + +/* + * Since we are now permitted to establish guard regions in read-only anonymous + * mappings, for the sake of thoroughness, though it probably has no practical + * use, test that guard regions function with a mapping to the anonymous zero + * page. + */ +TEST_F(guard_regions, anon_zeropage) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (!is_anon_backed(variant)) + SKIP(return, "anon zero page test specific to anon/shmem"); + + /* Obtain a read-only i.e. anon zero page mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove all guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Ensure zero page...*/ + ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0')); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that /proc/$pid/pagemap correctly identifies guard region ranges. + */ +TEST_F(guard_regions, pagemap) +{ + const unsigned long page_size = self->page_size; + int proc_fd; + char *ptr; + int i; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Read from pagemap, and assert no guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, 0); + } + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that PAGEMAP_SCAN correctly reports guard region ranges. + */ +TEST_F(guard_regions, pagemap_scan) +{ + const unsigned long page_size = self->page_size; + struct page_region pm_regs[10]; + struct pm_scan_arg pm_scan_args = { + .size = sizeof(struct pm_scan_arg), + .category_anyof_mask = PAGE_IS_GUARD, + .return_mask = PAGE_IS_GUARD, + .vec = (long)&pm_regs, + .vec_len = ARRAY_SIZE(pm_regs), + }; + int proc_fd, i; + char *ptr; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + pm_scan_args.start = (long)ptr; + pm_scan_args.end = (long)ptr + 10 * page_size; + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* + * Assert ioctl() returns the count of located regions, where each + * region spans every other page within the range of 10 pages. + */ + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 5; i++) { + long ptr_p = (long)&ptr[2 * i * page_size]; + + ASSERT_EQ(pm_regs[i].start, ptr_p); + ASSERT_EQ(pm_regs[i].end, ptr_p + page_size); + ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index ad168d35b23b..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -93,40 +93,67 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret; + if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed\n"); + if (errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } else { + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; + } return; } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed\n"); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed\n"); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } - /* - * Fault in the page writable such that GUP-fast can eventually pin - * it immediately. - */ + /* Fault in the page such that GUP-fast can pin it directly. */ memset(mem, 0, size); switch (type) { case TEST_TYPE_RO: case TEST_TYPE_RO_FAST: + /* + * Cover more cases regarding unsharing decisions when + * long-term R/O pinning by mapping the page R/O. + */ + ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + goto munmap; + } + /* FALLTHROUGH */ case TEST_TYPE_RW: case TEST_TYPE_RW_FAST: { struct pin_longterm_test args; @@ -136,18 +163,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; } if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* * R/O pinning or pinning in a private mapping is always * expected to work. Otherwise, we expect long-term R/W pinning - * to only succeed for special fielesystems. + * to only succeed for special filesystems. */ should_work = !shared || !rw || fs_supports_writable_longterm_pinning(fs_type); @@ -158,25 +187,35 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; } if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", + strerror(errno)); /* * TODO: if the kernel ever supports long-term R/W pinning on * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -186,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -195,7 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -208,15 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); } @@ -228,7 +283,10 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) assert(false); } +munmap: munmap(mem, size); +report: + log_test_result(result); } typedef void (*test_fn)(int fd, size_t size); @@ -237,11 +295,12 @@ static void run_with_memfd(test_fn fn, const char *desc) { int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); return; } @@ -254,23 +313,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed\n"); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } } fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); } static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -278,22 +337,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd; - ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc); fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed\n"); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); if (unlink(filename)) { - ksft_test_result_fail("unlink() failed\n"); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; } fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); } static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -302,14 +361,15 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); return; } @@ -438,7 +498,7 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int i, err; + int i; pagesize = getpagesize(); nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, @@ -452,9 +512,5 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_test_case(&test_cases[i]); - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - return ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index 18a49c70d4c6..bdeaac67ff9a 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -1,3 +1,4 @@ +#define __SANE_USERSPACE_TYPES__ // Use ll64 #include <fcntl.h> #include <errno.h> #include <stdio.h> @@ -228,7 +229,7 @@ int main(int argc, char **argv) break; } ksft_test_result_skip("Please run this test as root\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); @@ -267,5 +268,5 @@ int main(int argc, char **argv) free(tid); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0..141bf63cbe05 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map) buffer->fd = -1; buffer->size = size; - buffer->mirror = malloc(npages); + buffer->mirror = malloc(size); ASSERT_NE(buffer->mirror, NULL); /* Reserve a range of addresses. */ diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 267eea2e0e0b..3b1b532f1cbb 100644 --- a/tools/testing/selftests/mm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -8,13 +8,6 @@ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this * example, the app is requesting memory of size 256MB that is backed by * huge pages. - * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ #define _GNU_SOURCE #include <stdlib.h> @@ -27,15 +20,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_SHARED | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_SHARED) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -74,7 +58,7 @@ int main(void) if (fd < 0) ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + addr = mmap(NULL, LENGTH, PROTECTION, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { close(fd); ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c index 478bb1e989e9..ef06260802b5 100644 --- a/tools/testing/selftests/mm/hugepage-shm.c +++ b/tools/testing/selftests/mm/hugepage-shm.c @@ -8,13 +8,6 @@ * SHM_HUGETLB in the shmget system call to inform the kernel that it is * requesting huge pages. * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. - * * Note: The default shared memory limit is quite low on many kernels, * you may need to increase it via: * @@ -39,15 +32,6 @@ #define dprintf(x) printf(x) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define SHMAT_FLAGS (SHM_RND) -#else -#define ADDR (void *)(0x0UL) -#define SHMAT_FLAGS (0) -#endif - int main(void) { int shmid; @@ -61,7 +45,7 @@ int main(void) } printf("shmid: 0x%x\n", shmid); - shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + shmaddr = shmat(shmid, NULL, 0); if (shmaddr == (char *)-1) { perror("Shared memory attach failure"); shmctl(shmid, IPC_RMID, NULL); diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c index 894d28c3dd47..df366a4d1b92 100644 --- a/tools/testing/selftests/mm/hugepage-vmemmap.c +++ b/tools/testing/selftests/mm/hugepage-vmemmap.c @@ -22,20 +22,6 @@ #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1) -/* - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. - */ -#ifdef __ia64__ -#define MAP_ADDR (void *)(0x8000000000000000UL) -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define MAP_ADDR NULL -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static size_t pagesize; static size_t maplength; @@ -113,7 +99,8 @@ int main(int argc, char **argv) exit(1); } - addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + addr = mmap(NULL, maplength, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c new file mode 100644 index 000000000000..f086f0e04756 --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test soft offline behavior for HugeTLB pages: + * - if enable_soft_offline = 0, hugepages should stay intact and soft + * offlining failed with EOPNOTSUPP. + * - if enable_soft_offline = 1, a hugepage should be dissolved and + * nr_hugepages/free_hugepages should be reduced by 1. + * + * Before running, make sure more than 2 hugepages of default_hugepagesz + * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB: + * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <linux/magic.h> +#include <linux/memfd.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <sys/types.h> + +#include "../kselftest.h" + +#ifndef MADV_SOFT_OFFLINE +#define MADV_SOFT_OFFLINE 101 +#endif + +#define EPREFIX " !!! " + +static int do_soft_offline(int fd, size_t len, int expect_errno) +{ + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + int ret = 0; + + if (ftruncate(fd, len) < 0) { + ksft_perror(EPREFIX "ftruncate to len failed"); + return -1; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + ksft_perror(EPREFIX "mmap failed"); + ret = -1; + goto untruncate; + } + + memset(filemap, 0xab, len); + ksft_print_msg("Allocated %#lx bytes of hugetlb pages\n", len); + + hwp_addr = filemap + len / 2; + ret = madvise(hwp_addr, pagesize, MADV_SOFT_OFFLINE); + ksft_print_msg("MADV_SOFT_OFFLINE %p ret=%d, errno=%d\n", + hwp_addr, ret, errno); + if (ret != 0) + ksft_perror(EPREFIX "madvise failed"); + + if (errno == expect_errno) + ret = 0; + else { + ksft_print_msg("MADV_SOFT_OFFLINE should ret %d\n", + expect_errno); + ret = -1; + } + + munmap(filemap, len); +untruncate: + if (ftruncate(fd, 0) < 0) + ksft_perror(EPREFIX "ftruncate back to 0 failed"); + + return ret; +} + +static int set_enable_soft_offline(int value) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + if (value != 0 && value != 1) + return -EINVAL; + + sprintf(cmd, "echo %d > /proc/sys/vm/enable_soft_offline", value); + cmdfile = popen(cmd, "r"); + + if (cmdfile) + ksft_print_msg("enable_soft_offline => %d\n", value); + else { + ksft_perror(EPREFIX "failed to set enable_soft_offline"); + return errno; + } + + pclose(cmdfile); + return 0; +} + +static int read_nr_hugepages(unsigned long hugepage_size, + unsigned long *nr_hugepages) +{ + char buffer[256] = {0}; + char cmd[256] = {0}; + + sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages", + hugepage_size); + FILE *cmdfile = popen(cmd, "r"); + + if (cmdfile == NULL) { + ksft_perror(EPREFIX "failed to popen nr_hugepages"); + return -1; + } + + if (!fgets(buffer, sizeof(buffer), cmdfile)) { + ksft_perror(EPREFIX "failed to read nr_hugepages"); + pclose(cmdfile); + return -1; + } + + *nr_hugepages = atoll(buffer); + pclose(cmdfile); + return 0; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + ksft_perror(EPREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + ksft_perror(EPREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + ksft_print_msg(EPREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +static void test_soft_offline_common(int enable_soft_offline) +{ + int fd; + int expect_errno = enable_soft_offline ? 0 : EOPNOTSUPP; + struct statfs file_stat; + unsigned long hugepagesize_kb = 0; + unsigned long nr_hugepages_before = 0; + unsigned long nr_hugepages_after = 0; + int ret; + + ksft_print_msg("Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + ksft_exit_fail_msg("Failed to create hugetlbfs file\n"); + + hugepagesize_kb = file_stat.f_bsize / 1024; + ksft_print_msg("Hugepagesize is %ldkB\n", hugepagesize_kb); + + if (set_enable_soft_offline(enable_soft_offline) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to set enable_soft_offline\n"); + } + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_before); + + ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno); + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_after); + + // No need for the hugetlbfs file from now on. + close(fd); + + if (enable_soft_offline) { + if (nr_hugepages_before != nr_hugepages_after + 1) { + ksft_test_result_fail("MADV_SOFT_OFFLINE should reduced 1 hugepage\n"); + return; + } + } else { + if (nr_hugepages_before != nr_hugepages_after) { + ksft_test_result_fail("MADV_SOFT_OFFLINE reduced %lu hugepages\n", + nr_hugepages_before - nr_hugepages_after); + return; + } + } + + ksft_test_result(ret == 0, + "Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_soft_offline_common(1); + test_soft_offline_common(0); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c new file mode 100644 index 000000000000..db63abe5ee5e --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program tests for hugepage leaks after DIO writes to a file using a + * hugepage as the user buffer. During DIO, the user buffer is pinned and + * should be properly unpinned upon completion. This patch verifies that the + * kernel correctly unpins the buffer at DIO completion for both aligned and + * unaligned user buffer offsets (w.r.t page boundary), ensuring the hugepage + * is freed upon unmapping. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include "vm_util.h" +#include "../kselftest.h" + +void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) +{ + int fd; + char *buffer = NULL; + char *orig_buffer = NULL; + size_t h_pagesize = 0; + size_t writesize; + int free_hpage_b = 0; + int free_hpage_a = 0; + const int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + const int mmap_prot = PROT_READ | PROT_WRITE; + + writesize = end_off - start_off; + + /* Get the default huge page size */ + h_pagesize = default_huge_page_size(); + if (!h_pagesize) + ksft_exit_fail_msg("Unable to determine huge page size\n"); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_fail_perror("Error opening file\n"); + + /* Get the free huge pages before allocation */ + free_hpage_b = get_free_hugepages(); + if (free_hpage_b == 0) { + close(fd); + ksft_exit_skip("No free hugepage, exiting!\n"); + } + + /* Allocate a hugetlb page */ + orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); + if (orig_buffer == MAP_FAILED) { + close(fd); + ksft_exit_fail_perror("Error mapping memory\n"); + } + buffer = orig_buffer; + buffer += start_off; + + memset(buffer, 'A', writesize); + + /* Write the buffer to the file */ + if (write(fd, buffer, writesize) != (writesize)) { + munmap(orig_buffer, h_pagesize); + close(fd); + ksft_exit_fail_perror("Error writing to file\n"); + } + + /* unmap the huge page */ + munmap(orig_buffer, h_pagesize); + close(fd); + + /* Get the free huge pages after unmap*/ + free_hpage_a = get_free_hugepages(); + + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + + /* + * If the no. of free hugepages before allocation and after unmap does + * not match - that means there could still be a page which is pinned. + */ + ksft_test_result(free_hpage_a == free_hpage_b, + "free huge pages from %u-%u\n", start_off, end_off); +} + +int main(void) +{ + size_t pagesize = 0; + int fd; + + ksft_print_header(); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno)); + close(fd); + + /* Check if huge pages are free */ + if (!get_free_hugepages()) + ksft_exit_skip("No free hugepage, exiting\n"); + + ksft_set_plan(4); + + /* Get base page size */ + pagesize = psize(); + + /* start and end is aligned to pagesize */ + run_dio_using_hugetlb(0, (pagesize * 3)); + + /* start is aligned but end is not aligned */ + run_dio_using_hugetlb(0, (pagesize * 3) - (pagesize / 2)); + + /* start is unaligned and end is aligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3)); + + /* both start and end are unaligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3) + (pagesize / 2)); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c index 73b81c632366..e2640529dbb2 100644 --- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -5,20 +5,36 @@ #include <sys/mman.h> #include <sys/types.h> #include <unistd.h> +#include <setjmp.h> +#include <signal.h> #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 -char *huge_ptr; +static char *huge_ptr; +static size_t huge_page_size; + +static sigjmp_buf sigbuf; +static bool sigbus_triggered; + +static void signal_handler(int signal) +{ + if (signal == SIGBUS) { + sigbus_triggered = true; + siglongjmp(sigbuf, 1); + } +} /* Touch the memory while it is being madvised() */ void *touch(void *unused) { char *ptr = (char *)huge_ptr; + if (sigsetjmp(sigbuf, 1)) + return NULL; + for (int i = 0; i < INLOOP_ITER; i++) ptr[0] = '.'; @@ -30,7 +46,7 @@ void *madv(void *unused) usleep(rand() % 10); for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, huge_page_size, MADV_DONTNEED); return NULL; } @@ -44,9 +60,23 @@ int main(void) * interactions */ int max = 10000; + int err; + + ksft_print_header(); + ksft_set_plan(1); srand(getpid()); + if (signal(SIGBUS, signal_handler) == SIG_ERR) + ksft_exit_skip("Could not register signal handler."); + + huge_page_size = default_huge_page_size(); + if (!huge_page_size) + ksft_exit_skip("Could not detect default hugetlb page size."); + + ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n", + huge_page_size / 1024); + free_hugepages = get_free_hugepages(); if (free_hugepages != 1) { ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", @@ -54,7 +84,7 @@ int main(void) } while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -66,8 +96,14 @@ int main(void) pthread_join(thread1, NULL); pthread_join(thread2, NULL); - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, huge_page_size); } - return KSFT_PASS; + ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n"); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c index d01e8d4901d0..8f122a0f0828 100644 --- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c +++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c @@ -27,9 +27,9 @@ #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 +size_t mmap_size; char *huge_ptr; /* Touch the memory while it is being madvised() */ @@ -44,7 +44,7 @@ void *touch(void *unused) void *madv(void *unused) { for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, mmap_size, MADV_DONTNEED); return NULL; } @@ -59,7 +59,7 @@ void *map_extra(void *unused) void *ptr; for (int i = 0; i < INLOOP_ITER; i++) { - ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -93,14 +93,16 @@ int main(void) free_hugepages); } + mmap_size = default_huge_page_size(); + while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if ((unsigned long)huge_ptr == -1) { - ksft_exit_skip("Failed to allocated huge page\n"); - return KSFT_SKIP; + ksft_test_result_fail("Failed to allocate huge page\n"); + return KSFT_FAIL; } pthread_create(&thread1, NULL, madv, NULL); @@ -117,7 +119,7 @@ int main(void) } /* Unmap and restart */ - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, mmap_size); } return KSFT_PASS; diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0dd31892ff67 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi @@ -36,7 +36,7 @@ else do_umount=1 fi fi -MNT='/mnt/huge/' +MNT='/mnt/huge' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) @@ -56,10 +56,45 @@ function cleanup() { rmdir "$CGROUP_ROOT"/a/b 2>/dev/null rmdir "$CGROUP_ROOT"/a 2>/dev/null rmdir "$CGROUP_ROOT"/test1 2>/dev/null - echo 0 >/proc/sys/vm/nr_hugepages + echo $nr_hugepgs >/proc/sys/vm/nr_hugepages set -e } +function assert_with_retry() { + local actual_path="$1" + local expected="$2" + local tolerance=$((7 * 1024 * 1024)) + local timeout=20 + local interval=1 + local start_time + local now + local elapsed + local actual + + start_time=$(date +%s) + + while true; do + actual="$(cat "$actual_path")" + + if [[ $actual -ge $(($expected - $tolerance)) ]] && + [[ $actual -le $(($expected + $tolerance)) ]]; then + return 0 + fi + + now=$(date +%s) + elapsed=$((now - start_time)) + + if [[ $elapsed -ge $timeout ]]; then + echo "actual = $((${actual%% *} / 1024 / 1024)) MB" + echo "expected = $((${expected%% *} / 1024 / 1024)) MB" + cleanup + exit 1 + fi + + sleep $interval + done +} + function assert_state() { local expected_a="$1" local expected_a_hugetlb="$2" @@ -70,58 +105,13 @@ function assert_state() { expected_b="$3" expected_b_hugetlb="$4" fi - local tolerance=$((5 * 1024 * 1024)) - - local actual_a - actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)" - if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] || - [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then - echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB - echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - local actual_a_hugetlb - actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] || - [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then - echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB - echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB - echo fail + assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a" + assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb" - cleanup - exit 1 - fi - - if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then - return - fi - - local actual_b - actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)" - if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] || - [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then - echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB - echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - - local actual_b_hugetlb - actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] || - [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then - echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB - echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 + if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then + assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b" + assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb" fi } @@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages. cleanup echo -echo echo Test charge, rmdir, uncharge setup echo mkdir @@ -195,7 +184,6 @@ cleanup echo done echo -echo if [[ ! $cgroup2 ]]; then echo "Test parent and child hugetlb usage" setup @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then assert_state 0 $(($size * 2)) 0 $size rmdir "$CGROUP_ROOT"/a/b - sleep 5 echo Assert memory reparent correctly. assert_state 0 $(($size * 2)) @@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then fi echo -echo echo "Test child only hugetlb usage" echo setup setup diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 829320a519e7..8a4d34cce36b 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1091,11 +1091,11 @@ static void usage(void) fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); - fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n"); fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); - fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n"); exit(1); } @@ -1209,6 +1209,8 @@ int main(int argc, char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; default_settings.hugepages[anon_order].enabled = THP_ALWAYS; + default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT; + default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index d615767e396b..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -28,6 +28,15 @@ #define MiB (1024 * KiB) #define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child" +#define MAP_MERGE_FAIL ((void *)-1) +#define MAP_MERGE_SKIP ((void *)-2) + +enum ksm_merge_mode { + KSM_MERGE_PRCTL, + KSM_MERGE_MADVISE, + KSM_MERGE_NONE, /* PRCTL already set */ +}; + static int mem_fd; static int ksm_fd; static int ksm_full_scans_fd; @@ -146,33 +155,34 @@ static int ksm_unmerge(void) return 0; } -static char *mmap_and_merge_range(char val, unsigned long size, int prot, - bool use_prctl) +static char *__mmap_and_merge_range(char val, unsigned long size, int prot, + enum ksm_merge_mode mode) { char *map; + char *err_map = MAP_MERGE_FAIL; int ret; /* Stabilize accounting by disabling KSM completely. */ if (ksm_unmerge()) { - ksft_test_result_fail("Disabling (unmerging) KSM failed\n"); - return MAP_FAILED; + ksft_print_msg("Disabling (unmerging) KSM failed\n"); + return err_map; } if (get_my_merging_pages() > 0) { - ksft_test_result_fail("Still pages merged\n"); - return MAP_FAILED; + ksft_print_msg("Still pages merged\n"); + return err_map; } map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); - return MAP_FAILED; + ksft_print_msg("mmap() failed\n"); + return err_map; } /* Don't use THP. Ignore if THP are not around on a kernel. */ if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_print_msg("MADV_NOHUGEPAGE failed\n"); goto unmap; } @@ -180,27 +190,36 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot, memset(map, val, size); if (mprotect(map, size, prot)) { - ksft_test_result_skip("mprotect() failed\n"); + ksft_print_msg("mprotect() failed\n"); + err_map = MAP_MERGE_SKIP; goto unmap; } - if (use_prctl) { + switch (mode) { + case KSM_MERGE_PRCTL: ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); if (ret < 0 && errno == EINVAL) { - ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n"); + err_map = MAP_MERGE_SKIP; goto unmap; } else if (ret) { - ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n"); goto unmap; } - } else if (madvise(map, size, MADV_MERGEABLE)) { - ksft_test_result_fail("MADV_MERGEABLE failed\n"); - goto unmap; + break; + case KSM_MERGE_MADVISE: + if (madvise(map, size, MADV_MERGEABLE)) { + ksft_print_msg("MADV_MERGEABLE failed\n"); + goto unmap; + } + break; + case KSM_MERGE_NONE: + break; } /* Run KSM to trigger merging and wait. */ if (ksm_merge()) { - ksft_test_result_fail("Running KSM failed\n"); + ksft_print_msg("Running KSM failed\n"); goto unmap; } @@ -209,14 +228,31 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot, * accounted differently (depending on kernel support). */ if (val && !get_my_merging_pages()) { - ksft_test_result_fail("No pages got merged\n"); + ksft_print_msg("No pages got merged\n"); goto unmap; } return map; unmap: munmap(map, size); - return MAP_FAILED; + return err_map; +} + +static char *mmap_and_merge_range(char val, unsigned long size, int prot, + enum ksm_merge_mode mode) +{ + char *map; + char *ret = MAP_FAILED; + + map = __mmap_and_merge_range(val, size, prot, mode); + if (map == MAP_MERGE_FAIL) + ksft_test_result_fail("Merging memory failed"); + else if (map == MAP_MERGE_SKIP) + ksft_test_result_skip("Merging memory skipped"); + else + ret = map; + + return ret; } static void test_unmerge(void) @@ -226,7 +262,7 @@ static void test_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -264,7 +300,7 @@ static void test_unmerge_zero_pages(void) } /* Let KSM deduplicate zero pages. */ - map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -312,7 +348,7 @@ static void test_unmerge_discarded(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -344,7 +380,7 @@ static void test_unmerge_uffd_wp(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -439,6 +475,36 @@ static void test_prctl(void) ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n"); } +static int test_child_ksm(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + /* Test if KSM is enabled for the process. */ + if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1) + return -1; + + /* Test if merge could really happen. */ + map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE); + if (map == MAP_MERGE_FAIL) + return -2; + else if (map == MAP_MERGE_SKIP) + return -3; + + munmap(map, size); + return 0; +} + +static void test_child_ksm_err(int status) +{ + if (status == -1) + ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); + else if (status == -2) + ksft_test_result_fail("Merge in child failed\n"); + else if (status == -3) + ksft_test_result_skip("Merge in child skipped\n"); +} + /* Verify that prctl ksm flag is inherited. */ static void test_prctl_fork(void) { @@ -458,7 +524,7 @@ static void test_prctl_fork(void) child_pid = fork(); if (!child_pid) { - exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0)); + exit(test_child_ksm()); } else if (child_pid < 0) { ksft_test_result_fail("fork() failed\n"); return; @@ -467,8 +533,11 @@ static void test_prctl_fork(void) if (waitpid(child_pid, &status, 0) < 0) { ksft_test_result_fail("waitpid() failed\n"); return; - } else if (WEXITSTATUS(status) != 1) { - ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); + } + + status = WEXITSTATUS(status); + if (status) { + test_child_ksm_err(status); return; } @@ -480,12 +549,6 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); } -static int ksm_fork_exec_child(void) -{ - /* Test if KSM is enabled for the process. */ - return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1; -} - static void test_prctl_fork_exec(void) { int ret, status; @@ -518,7 +581,7 @@ static void test_prctl_fork_exec(void) if (WIFEXITED(status)) { status = WEXITSTATUS(status); if (status) { - ksft_test_result_fail("KSM not enabled\n"); + test_child_ksm_err(status); return; } } else { @@ -545,7 +608,7 @@ static void test_prctl_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL); if (map == MAP_FAILED) return; @@ -568,7 +631,7 @@ static void test_prot_none(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0x11, size, PROT_NONE, false); + map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) goto unmap; @@ -593,13 +656,34 @@ unmap: munmap(map, size); } +static void init_global_file_handles(void) +{ + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + int main(int argc, char **argv) { unsigned int tests = 8; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { - exit(ksm_fork_exec_child() == 1 ? 0 : 1); + init_global_file_handles(); + exit(test_child_ksm()); } #ifdef __NR_userfaultfd @@ -611,22 +695,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); - if (mem_fd < 0) - ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); @@ -646,5 +715,5 @@ int main(int argc, char **argv) if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", err, ksft_test_num()); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index b748c48908d9..e80deac1436b 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -58,40 +58,12 @@ int debug; static int ksm_write_sysfs(const char *file_path, unsigned long val) { - FILE *f = fopen(file_path, "w"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fprintf(f, "%lu", val) < 0) { - perror("fprintf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return write_sysfs(file_path, val); } static int ksm_read_sysfs(const char *file_path, unsigned long *val) { - FILE *f = fopen(file_path, "r"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fscanf(f, "%lu", val) != 1) { - perror("fscanf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return read_sysfs(file_path, val); } static void ksm_print_sysfs(void) @@ -776,7 +748,7 @@ err_out: int main(int argc, char *argv[]) { - int ret, opt; + int ret = 0, opt; int prot = 0; int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT; int merge_type = KSM_MERGE_TYPE_DEFAULT; diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index 17bcb07f19f3..b6fabd5c27ed 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -172,12 +172,12 @@ static void test_populate_read(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "read range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_READ); ksft_test_result(!ret, "MADV_POPULATE_READ\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "read range is populated\n"); munmap(addr, SIZE); } @@ -194,12 +194,12 @@ static void test_populate_write(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "write range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "write range is populated\n"); munmap(addr, SIZE); } @@ -247,19 +247,19 @@ static void test_softdirty(void) /* Clear any softdirty bits. */ clear_softdirty(); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "cleared range is not softdirty\n"); /* Populating READ should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_READ); - ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n"); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "range is not softdirty after MADV_POPULATE_READ\n"); /* Populating WRITE should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); - ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_softdirty(addr, SIZE), - "range is softdirty\n"); + "range is softdirty after MADV_POPULATE_WRITE \n"); munmap(addr, SIZE); } @@ -307,5 +307,5 @@ int main(int argc, char **argv) if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", err, ksft_test_num()); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc951..1e9980b8993c 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index a1f005a90a4f..b47399feab53 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -4,11 +4,6 @@ * system call with MAP_HUGETLB flag. Before running this program make * sure the administrator has allocated enough default sized huge pages * to cover the 256 MB allocation. - * - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. */ #include <stdlib.h> #include <stdio.h> @@ -21,15 +16,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -60,7 +46,7 @@ int main(int argc, char **argv) void *addr; size_t hugepage_size; size_t length = LENGTH; - int flags = FLAGS; + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; int shift = 0; hugepage_size = default_huge_page_size(); @@ -85,7 +71,7 @@ int main(int argc, char **argv) ksft_print_msg("Default size hugepages\n"); ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20); - addr = mmap(ADDR, length, PROTECTION, flags, -1, 0); + addr = mmap(NULL, length, PROTECTION, flags, -1, 0); if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 5c8a53869b1b..9df2636c829b 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -18,6 +18,8 @@ #include <unistd.h> #include "../kselftest.h" +#include "vm_util.h" + #define MMAP_SZ 4096 #define BUG_ON(condition, description) \ @@ -87,6 +89,9 @@ int main(int argc, char **argv) BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); + if (ret < 0 && errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } BUG_ON(ret, "ftruncate()"); smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 9b298f6a04b3..9a0597310a76 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -20,6 +20,7 @@ #include <unistd.h> #include <errno.h> #include <stdio.h> +#include <fcntl.h> #include "../kselftest.h" @@ -83,6 +84,45 @@ static void test_mlock_limit(int fd) pass("mlock limit is respected\n"); } +static void test_vmsplice(int fd, const char *desc) +{ + ssize_t transferred; + struct iovec iov; + int pipefd[2]; + char *mem; + + if (pipe(pipefd)) { + fail("pipe failed: %s\n", strerror(errno)); + return; + } + + mem = mmap(NULL, page_size, prot, mode, fd, 0); + if (mem == MAP_FAILED) { + fail("Unable to mmap secret memory\n"); + goto close_pipe; + } + + /* + * vmsplice() may use GUP-fast, which must also fail. Prefault the + * page table, so GUP-fast could find it. + */ + memset(mem, PATTERN, page_size); + + iov.iov_base = mem; + iov.iov_len = page_size; + transferred = vmsplice(pipefd[1], &iov, 1, 0); + + if (transferred < 0 && errno == EFAULT) + pass("vmsplice is blocked as expected with %s\n", desc); + else + fail("vmsplice: unexpected memory access with %s\n", desc); + + munmap(mem, page_size); +close_pipe: + close(pipefd[0]); + close(pipefd[1]); +} + static void try_process_vm_read(int fd, int pipefd[2]) { struct iovec liov, riov; @@ -187,7 +227,6 @@ static void test_remote_access(int fd, const char *name, return; } - ftruncate(fd, page_size); memset(mem, PATTERN, page_size); if (write(pipefd[1], &mem, sizeof(mem)) < 0) { @@ -258,7 +297,7 @@ static void prepare(void) strerror(errno)); } -#define NUM_TESTS 4 +#define NUM_TESTS 6 int main(int argc, char *argv[]) { @@ -277,9 +316,17 @@ int main(int argc, char *argv[]) ksft_exit_fail_msg("memfd_secret failed: %s\n", strerror(errno)); } + if (ftruncate(fd, page_size)) + ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno)); test_mlock_limit(fd); test_file_apis(fd); + /* + * We have to run the first vmsplice test before any secretmem page was + * allocated for this fd. + */ + test_vmsplice(fd, "fresh page"); + test_vmsplice(fd, "existing page"); test_process_vm_read(fd); test_ptrace(fd); diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c new file mode 100644 index 000000000000..cc26480098ae --- /dev/null +++ b/tools/testing/selftests/mm/merge.c @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <linux/perf_event.h> +#include "vm_util.h" + +FIXTURE(merge) +{ + unsigned int page_size; + char *carveout; + struct procmap_fd procmap; +}; + +FIXTURE_SETUP(merge) +{ + self->page_size = psize(); + /* Carve out PROT_NONE region to map over. */ + self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(self->carveout, MAP_FAILED); + /* Setup PROCMAP_QUERY interface. */ + ASSERT_EQ(open_self_procmap(&self->procmap), 0); +} + +FIXTURE_TEARDOWN(merge) +{ + ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(close_procmap(&self->procmap), 0); +} + +TEST_F(merge, mprotect_unfaulted_left) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit + * merge failure due to lack of VM_ACCOUNT flag by mistake. + * + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 5 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + */ + ptr[5 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge (though for + * ~15 years we did not! :): + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 5 pages read-only, splitting the VMA: + * + * RW RO + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + */ + ptr[0] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_both) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first and last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the middle 3 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_faulted_left_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ptr[0] = 'x'; + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | faulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_left_faulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | unfaulted | + * |-----------|-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | faulted | + * |-----------|-----------------------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | faulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, forked_target_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------| + * | unfaulted | + * |-----------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------| + * | faulted | + * |-----------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child. + * + * forked + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size); +} + +TEST_F(merge, forked_source_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------|------------| + * | unfaulted | <unmapped> | + * |-----------|------------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------|------------| + * | faulted | <unmapped> | + * |-----------|------------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child, ptr2 after ptr, but incompatible. + * + * forked RW RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); + + /* + * Now mprotect forked region to RWX so it becomes the source for the + * merge to unfaulted region: + * + * forked RWX RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * This should NOT result in a merge, as ptr was forked. + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0); + /* Again, make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); +} + +TEST_F(merge, handle_uprobe_upon_merged_vma) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + unsigned int page_size = self->page_size; + const char *probe_file = "./foo"; + char *carveout = self->carveout; + struct perf_event_attr attr; + unsigned long type; + void *ptr1, *ptr2; + int fd; + + fd = open(probe_file, O_RDWR|O_CREAT, 0600); + ASSERT_GE(fd, 0); + + ASSERT_EQ(ftruncate(fd, page_size), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; + attr.type = type; + attr.config1 = (__u64)(long)probe_file; + attr.config2 = 0x0; + + ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0); + + ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC, + MAP_PRIVATE | MAP_FIXED, fd, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + ptr2 = mremap(ptr1, page_size, 2 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_NE(mremap(ptr2, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); + +out: + close(fd); + remove(probe_file); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 6908569ef406..1e3a595fbf01 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -15,10 +15,10 @@ #include <signal.h> #include <time.h> -#define TWOMEG (2<<20) -#define RUNTIME (20) - -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +#define TWOMEG (2<<20) +#define RUNTIME (20) +#define MAX_RETRIES 100 +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) FIXTURE(migration) { @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) int ret, tmp; int status = 0; struct timespec ts1, ts2; + int failures = 0; if (clock_gettime(CLOCK_MONOTONIC, &ts1)) return -1; @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) ret = move_pages(0, 1, (void **) &ptr, &n2, &status, MPOL_MF_MOVE_ALL); if (ret) { - if (ret > 0) + if (ret > 0) { + /* Migration is best effort; try again */ + if (++failures < MAX_RETRIES) + continue; printf("Didn't migrate %d pages\n", ret); + } else perror("Couldn't migrate pages"); return -2; } - + failures = 0; tmp = n2; n2 = n1; n1 = tmp; @@ -199,4 +204,103 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) ASSERT_EQ(pthread_cancel(self->threads[i]), 0); } +/* + * migration test with shared anon THP page + */ + +TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG); + ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + +/* + * migration test with private anon hugetlb page + */ +TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +/* + * migration test with shared anon hugetlb page + */ +TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index 301abb99e027..09feeb453646 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -281,6 +281,7 @@ static void test_uffdio_copy(void) dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0); if (dst == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); + free(src); return; } @@ -375,5 +376,5 @@ int main(void) if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", err, ksft_test_num()); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 1cd80b0f76c3..b8d7e966f44c 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size) MLOCK_ONFAULT); if (ret) - ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n", is_mlock ? "mlock" : "mlock2", - p, alloc_size, + strerror(errno), p, alloc_size, p + start_offset, lock_size); } diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 26f744188ad0..3e90ff37e336 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) FILE *file; int ret = 1; char line[1024] = {0}; - char *end_addr; - char *stop; unsigned long start; unsigned long end; @@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) memset(area, 0, sizeof(struct vm_boundaries)); while(fgets(line, 1024, file)) { - end_addr = strchr(line, '-'); - if (!end_addr) { + if (sscanf(line, "%lx-%lx", &start, &end) != 2) { ksft_print_msg("cannot parse /proc/self/maps\n"); goto out; } - *end_addr = '\0'; - end_addr++; - stop = strchr(end_addr, ' '); - if (!stop) { - ksft_print_msg("cannot parse /proc/self/maps\n"); - goto out; - } - - sscanf(line, "%lx", &start); - sscanf(end_addr, "%lx", &end); if (start <= addr && end > addr) { area->start = start; @@ -209,7 +196,7 @@ static void test_mlock_lock(void) ksft_exit_fail_msg("munlock(): %s\n", strerror(errno)); } - ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__); + ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__); munmap(map, 2 * page_size); } diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..81e77fa41901 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -6,7 +6,13 @@ static int mlock2_(void *start, size_t len, int flags) { - return syscall(__NR_mlock2, start, len, flags); + int ret = syscall(__NR_mlock2, start, len, flags); + + if (ret) { + errno = ret; + return -1; + } + return 0; } static FILE *seek_to_smaps_entry(unsigned long addr) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 2f8b991f78cb..bb84476a177f 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,7 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) @@ -31,7 +34,7 @@ struct config { unsigned long long dest_alignment; unsigned long long region_size; int overlapping; - int dest_preamble_size; + unsigned int dest_preamble_size; }; struct test { @@ -69,6 +72,27 @@ enum { .expect_failure = should_fail \ } +/* compute square root using binary search */ +static unsigned long get_sqrt(unsigned long val) +{ + unsigned long low = 1; + + /* assuming rand_size is less than 1TB */ + unsigned long high = (1UL << 20); + + while (low <= high) { + unsigned long mid = low + (high - low) / 2; + unsigned long temp = mid * mid; + + if (temp == val) + return mid; + if (temp < val) + low = mid + 1; + high = mid - 1; + } + return low; +} + /* * Returns false if the requested remap region overlaps with an * existing mapping (e.g text, stack) else returns true. @@ -126,19 +150,21 @@ static unsigned long long get_mmap_min_addr(void) * Using /proc/self/maps, assert that the specified address range is contained * within a single mapping. */ -static bool is_range_mapped(FILE *maps_fp, void *start, void *end) +static bool is_range_mapped(FILE *maps_fp, unsigned long start, + unsigned long end) { char *line = NULL; size_t len = 0; bool success = false; + unsigned long first_val, second_val; rewind(maps_fp); while (getline(&line, &len, maps_fp) != -1) { - char *first = strtok(line, "- "); - void *first_val = (void *)strtol(first, NULL, 16); - char *second = strtok(NULL, "- "); - void *second_val = (void *) strtol(second, NULL, 16); + if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) { + ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + break; + } if (first_val <= start && second_val >= end) { success = true; @@ -233,7 +259,8 @@ static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size) goto out; } - success = is_range_mapped(maps_fp, start, start + 3 * page_size); + success = is_range_mapped(maps_fp, (unsigned long)start, + (unsigned long)(start + 3 * page_size)); munmap(start, 3 * page_size); out: @@ -272,7 +299,8 @@ static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size) goto out; } - success = is_range_mapped(maps_fp, start, start + 3 * page_size); + success = is_range_mapped(maps_fp, (unsigned long)start, + (unsigned long)(start + 3 * page_size)); munmap(start, 3 * page_size); out: @@ -296,11 +324,11 @@ out: * * |DDDDddddSSSSssss| */ -static void mremap_move_within_range(char pattern_seed) +static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr) { char *test_name = "mremap mremap move within range"; void *src, *dest; - int i, success = 1; + unsigned int i, success = 1; size_t size = SIZE_MB(20); void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, @@ -316,10 +344,7 @@ static void mremap_move_within_range(char pattern_seed) src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1)); /* Set byte pattern for source block. */ - srand(pattern_seed); - for (i = 0; i < SIZE_MB(2); i++) { - ((char *)src)[i] = (char) rand(); - } + memcpy(src, rand_addr, SIZE_MB(2)); dest = src - SIZE_MB(2); @@ -357,14 +382,14 @@ out: /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, - char pattern_seed) + char *rand_addr) { - void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - int d; - unsigned long long t; + void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL; + unsigned long long t, d; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; + unsigned long num_chunks; if (threshold_mb == VALIDATION_NO_THRESHOLD) threshold = c.region_size; @@ -378,9 +403,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Set byte pattern for source block. */ - srand(pattern_seed); - for (t = 0; t < threshold; t++) - memset((char *) src_addr + t, (char) rand(), 1); + memcpy(src_addr, rand_addr, threshold); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,9 +443,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Set byte pattern for the dest preamble block. */ - srand(pattern_seed); - for (d = 0; d < c.dest_preamble_size; d++) - memset((char *) dest_preamble_addr + d, (char) rand(), 1); + memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -436,15 +457,42 @@ static long long remap_region(struct config c, unsigned int threshold_mb, goto clean_up_dest_preamble; } - /* Verify byte pattern after remapping */ - srand(pattern_seed); - for (t = 0; t < threshold; t++) { - char c = (char) rand(); + /* + * Verify byte pattern after remapping. Employ an algorithm with a + * square root time complexity in threshold: divide the range into + * chunks, if memcmp() returns non-zero, only then perform an + * iteration in that chunk to find the mismatch index. + */ + num_chunks = get_sqrt(threshold); + for (unsigned long i = 0; i < num_chunks; ++i) { + size_t chunk_size = threshold / num_chunks; + unsigned long shift = i * chunk_size; + + if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size)) + continue; + + /* brute force iteration only over mismatch segment */ + for (t = shift; t < shift + chunk_size; ++t) { + if (((char *) dest_addr)[t] != rand_addr[t]) { + ksft_print_msg("Data after remap doesn't match at offset %llu\n", + t); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff, + ((char *) dest_addr)[t] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + } - if (((char *) dest_addr)[t] != c) { + /* + * if threshold is not divisible by num_chunks, then check the + * last chunk + */ + for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) { + if (((char *) dest_addr)[t] != rand_addr[t]) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - t); - ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + t); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff, ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; @@ -452,22 +500,44 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Verify the dest preamble byte pattern after remapping */ - if (c.dest_preamble_size) { - srand(pattern_seed); - for (d = 0; d < c.dest_preamble_size; d++) { - char c = (char) rand(); - - if (((char *) dest_preamble_addr)[d] != c) { - ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - d); - ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[d] & 0xff); + if (!c.dest_preamble_size) + goto no_preamble; + + num_chunks = get_sqrt(c.dest_preamble_size); + + for (unsigned long i = 0; i < num_chunks; ++i) { + size_t chunk_size = c.dest_preamble_size / num_chunks; + unsigned long shift = i * chunk_size; + + if (!memcmp(dest_preamble_addr + shift, rand_addr + shift, + chunk_size)) + continue; + + /* brute force iteration only over mismatched segment */ + for (d = shift; d < shift + chunk_size; ++d) { + if (((char *) dest_preamble_addr)[d] != rand_addr[d]) { + ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n", + d); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff, + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } } } + for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) { + if (((char *) dest_preamble_addr)[d] != rand_addr[d]) { + ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n", + d); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff, + ((char *) dest_preamble_addr)[d] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + +no_preamble: start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec; end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec; ret = end_ns - start_ns; @@ -494,11 +564,12 @@ out: * the beginning of the mapping just because the aligned * down address landed on a mapping that maybe does not exist. */ -static void mremap_move_1mb_from_start(char pattern_seed) +static void mremap_move_1mb_from_start(unsigned int pattern_seed, + char *rand_addr) { char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; void *src = NULL, *dest = NULL; - int i, success = 1; + unsigned int i, success = 1; /* Config to reuse get_source_mapping() to do an aligned mmap. */ struct config c = { @@ -520,10 +591,7 @@ static void mremap_move_1mb_from_start(char pattern_seed) } /* Set byte pattern for source block. */ - srand(pattern_seed); - for (i = 0; i < SIZE_MB(2); i++) { - ((char *)src)[i] = (char) rand(); - } + memcpy(src, rand_addr, SIZE_MB(2)); /* * Unmap the beginning of dest so that the aligned address @@ -568,10 +636,10 @@ out: static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, - unsigned int pattern_seed) + char *rand_addr) { long long remap_time = remap_region(test_case.config, threshold_mb, - pattern_seed); + rand_addr); if (remap_time < 0) { if (test_case.expect_failure) @@ -640,9 +708,18 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, int main(int argc, char **argv) { int failures = 0; - int i, run_perf_tests; + unsigned int i; + int run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; + + /* hard-coded test configs */ + size_t max_test_variable_region_size = _2GB; + size_t max_test_constant_region_size = _2MB; + size_t dest_preamble_size = 10 * _4MB; + unsigned int pattern_seed; + char *rand_addr; + size_t rand_size; int num_expand_tests = 2; int num_misc_tests = 2; struct test test_cases[MAX_TEST] = {}; @@ -659,6 +736,31 @@ int main(int argc, char **argv) ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n", threshold_mb, pattern_seed); + /* + * set preallocated random array according to test configs; see the + * functions for the logic of setting the size + */ + if (!threshold_mb) + rand_size = MAX(max_test_variable_region_size, + max_test_constant_region_size); + else + rand_size = MAX(MIN(threshold_mb * _1MB, + max_test_variable_region_size), + max_test_constant_region_size); + rand_size = MAX(dest_preamble_size, rand_size); + + rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (rand_addr == MAP_FAILED) { + perror("mmap"); + ksft_exit_fail_msg("cannot mmap rand_addr\n"); + } + + /* fill stream of random bytes */ + srand(pattern_seed); + for (unsigned long i = 0; i < rand_size; ++i) + rand_addr[i] = (char) rand(); + page_size = sysconf(_SC_PAGESIZE); /* Expected mremap failures */ @@ -730,13 +832,13 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, - pattern_seed); + rand_addr); maps_fp = fopen("/proc/self/maps", "r"); if (maps_fp == NULL) { - ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno)); - exit(KSFT_FAIL); + munmap(rand_addr, rand_size); + ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno)); } mremap_expand_merge(maps_fp, page_size); @@ -744,17 +846,20 @@ int main(int argc, char **argv) fclose(maps_fp); - mremap_move_within_range(pattern_seed); - mremap_move_1mb_from_start(pattern_seed); + mremap_move_within_range(pattern_seed, rand_addr); + mremap_move_1mb_from_start(pattern_seed, rand_addr); if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++) run_mremap_test_case(perf_test_cases[i], &failures, - threshold_mb, pattern_seed); + threshold_mb, + rand_addr); } + munmap(rand_addr, rand_size); + if (failures > 0) ksft_exit_fail(); else diff --git a/tools/testing/selftests/mm/mseal_helpers.h b/tools/testing/selftests/mm/mseal_helpers.h new file mode 100644 index 000000000000..0cfce31c76d2 --- /dev/null +++ b/tools/testing/selftests/mm/mseal_helpers.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define FAIL_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_fail("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define SKIP_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_skip("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define REPORT_TEST_PASS() ksft_test_result_pass("%s\n", __func__) + +#ifndef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 +#endif + +#ifndef PKEY_BITS_PER_PKEY +#define PKEY_BITS_PER_PKEY 2 +#endif + +#ifndef PKEY_MASK +#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif + +#ifndef u64 +#define u64 unsigned long long +#endif diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c new file mode 100644 index 000000000000..005f29c86484 --- /dev/null +++ b/tools/testing/selftests/mm/mseal_test.c @@ -0,0 +1,1989 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/mman.h> +#include <sys/mman.h> +#include <stdint.h> +#include <asm-generic/unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <stdbool.h> +#include "../kselftest.h" +#include <syscall.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> +#include <sys/stat.h> +#include "mseal_helpers.h" + +static unsigned long get_vma_size(void *addr, int *prot) +{ + FILE *maps; + char line[256]; + int size = 0; + uintptr_t addr_start, addr_end; + char protstr[5]; + *prot = 0; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) + return 0; + + while (fgets(line, sizeof(line), maps)) { + if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) { + if (addr_start == (uintptr_t) addr) { + size = addr_end - addr_start; + if (protstr[0] == 'r') + *prot |= 0x4; + if (protstr[1] == 'w') + *prot |= 0x2; + if (protstr[2] == 'x') + *prot |= 0x1; + break; + } + } + } + fclose(maps); + return size; +} + +/* + * define sys_xyx to call syscall directly. + */ +static int sys_mseal(void *start, size_t len) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mseal, start, len, 0); + return sret; +} + +static int sys_mprotect(void *ptr, size_t size, unsigned long prot) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mprotect, ptr, size, prot); + return sret; +} + +static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + errno = 0; + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); + return sret; +} + +static int sys_munmap(void *ptr, size_t size) +{ + int sret; + + errno = 0; + sret = syscall(__NR_munmap, ptr, size); + return sret; +} + +static int sys_madvise(void *start, size_t len, int types) +{ + int sret; + + errno = 0; + sret = syscall(__NR_madvise, start, len, types); + return sret; +} + +static void *sys_mremap(void *addr, size_t old_len, size_t new_len, + unsigned long flags, void *new_addr) +{ + void *sret; + + errno = 0; + sret = (void *) syscall(__NR_mremap, addr, old_len, new_len, flags, new_addr); + return sret; +} + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(__NR_pkey_alloc, flags, init_val); + + return ret; +} + +static unsigned int __read_pkey_reg(void) +{ + unsigned int pkey_reg = 0; +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax, edx; + unsigned int ecx = 0; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkey_reg = eax; +#endif + return pkey_reg; +} + +static void __write_pkey_reg(u64 pkey_reg) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax = pkey_reg; + unsigned int ecx = 0; + unsigned int edx = 0; + + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); +#endif +} + +static unsigned long pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + unsigned long shift = pkey_bit_position(pkey); + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + /* OR in new bits for pkey */ + reg |= (flags & PKEY_MASK) << shift; + return reg; +} + +static void set_pkey(int pkey, unsigned long pkey_value) +{ + u64 new_pkey_reg; + + new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value); + __write_pkey_reg(new_pkey_reg); +} + +static void setup_single_address(int size, void **ptrOut) +{ + void *ptr; + + ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + *ptrOut = ptr; +} + +static void setup_single_address_rw(int size, void **ptrOut) +{ + void *ptr; + unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE; + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); + *ptrOut = ptr; +} + +static int clean_single_address(void *ptr, int size) +{ + int ret; + ret = munmap(ptr, size); + return ret; +} + +static int seal_single_address(void *ptr, int size) +{ + int ret; + ret = sys_mseal(ptr, size); + return ret; +} + +bool seal_support(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == (void *) -1) + return false; + + ret = sys_mseal(ptr, page_size); + if (ret < 0) + return false; + + return true; +} + +bool pkey_supported(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + + if (pkey > 0) + return true; +#endif + return false; +} + +static void test_seal_addseal(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_unmapped_start(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* munmap 2 pages from ptr. */ + ret = sys_munmap(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail because 2 pages from ptr are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail because 2 pages from ptr are unmapped. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_unmapped_middle(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* munmap 2 pages from ptr + page. */ + ret = sys_munmap(ptr + page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail, since middle 2 pages are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* we still can add seal to the first page and last page*/ + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mseal(ptr + 3 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_unmapped_end(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap last 2 pages. */ + ret = sys_munmap(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail since last 2 pages are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* The first 2 pages is not sealed, and can add seals */ + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_multiple_vmas(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split the vma into 3. */ + ret = sys_mprotect(ptr + page_size, 2 * page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will get applied to all 4 pages - 3 VMAs. */ + ret = sys_mprotect(ptr, size, PROT_READ); + FAIL_TEST_IF_FALSE(!ret); + + /* use mprotect to split the vma into 3. */ + ret = sys_mprotect(ptr + page_size, 2 * page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mseal get applied to all 4 pages - 3 VMAs. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_split_start(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split at middle */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first page, this will split the VMA */ + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* add seal to the remain 3 pages */ + ret = sys_mseal(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_split_end(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split at middle */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the last page */ + ret = sys_mseal(ptr + 3 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* Adding seals to the first 3 pages */ + ret = sys_mseal(ptr, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_invalid_input(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(8 * page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + ret = clean_single_address(ptr + 4 * page_size, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* invalid flag */ + ret = syscall(__NR_mseal, ptr, size, 0x20); + FAIL_TEST_IF_FALSE(ret < 0); + + /* unaligned address */ + ret = sys_mseal(ptr + 1, 2 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* length too big */ + ret = sys_mseal(ptr, 5 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* length overflow */ + ret = sys_mseal(ptr, UINT64_MAX/page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* start is not in a valid VMA */ + ret = sys_mseal(ptr - page_size, 5 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + REPORT_TEST_PASS(); +} + +static void test_seal_zero_length(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal 0 length will be OK, same as mprotect */ + ret = sys_mseal(ptr, 0); + FAIL_TEST_IF_FALSE(!ret); + + /* verify the 4 pages are not sealed by previous call. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_zero_address(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + /* use mmap to change protection. */ + ptr = mmap(0, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + FAIL_TEST_IF_FALSE(ptr == 0); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == 4 * page_size); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + /* verify the 4 pages are sealed by previous call. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_twice(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + /* apply the same seal will be OK. idempotent. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_start_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* the first page is sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* pages after the first page is not sealed. */ + ret = sys_mprotect(ptr + page_size, page_size * 3, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_end_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* first page is not sealed */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* last 3 page are sealed */ + ret = sys_mprotect(ptr + page_size, page_size * 3, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_unalign_len(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, page_size * 2 - 1); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 2 pages are sealed. */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 2, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_unalign_len_variant_2(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + if (seal) { + ret = seal_single_address(ptr, page_size * 2 + 1); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 3 pages are sealed. */ + ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 3, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_two_vma(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = seal_single_address(ptr, page_size * 4); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 2, page_size * 2, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_two_vma_with_split(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split as two vma. */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mseal can apply across 2 vma, also split them. */ + if (seal) { + ret = seal_single_address(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + } + + /* the first page is not sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* the second page is sealed. */ + ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* the third page is sealed. */ + ret = sys_mprotect(ptr + 2 * page_size, page_size, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* the fouth page is not sealed. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_partial_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* seal one page. */ + if (seal) { + ret = seal_single_address(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mprotect first 2 page will fail, since the first page are sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_partial_mprotect_tail(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + /* + * Check if a partial mseal (that results in two vmas) works correctly. + * It might mprotect the first, but it'll never touch the second (msealed) vma. + */ + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, size, PROT_EXEC); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + + +static void test_seal_mprotect_two_vma_with_gap(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* use munmap to free two pages in the middle */ + ret = sys_munmap(ptr + page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail, because there is a gap in the address. */ + /* notes, internally mprotect still updated the first page. */ + ret = sys_mprotect(ptr, 4 * page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, 4 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* the first page is not sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + /* the last page is not sealed. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_split(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal all 4 pages. */ + if (seal) { + ret = sys_mseal(ptr, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mprotect is sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + + ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_mprotect_merge(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split one page. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal first two pages. */ + if (seal) { + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 2 pages are sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* last 2 pages are not sealed. */ + ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + REPORT_TEST_PASS(); +} + +static void test_seal_munmap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 4 pages are sealed. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +/* + * allocate 4 pages, + * use mprotect to split it as two VMAs + * seal the whole range + * munmap will fail on both + */ +static void test_seal_munmap_two_vma(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_munmap(ptr, page_size * 2); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr + page_size, page_size * 2); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +/* + * allocate a VMA with 4 pages. + * munmap the middle 2 pages. + * seal the whole 4 pages, will fail. + * munmap the first page will be OK. + * munmap the last page will be OK. + */ +static void test_seal_munmap_vma_with_gap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_munmap(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + /* can't have gap in the middle. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + } + + ret = sys_munmap(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr + page_size * 2, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_munmap_partial_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + +static void test_munmap_start_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap the first page. */ + ret = sys_munmap(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the last 3 pages. */ + if (seal) { + ret = sys_mseal(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* unmap from the first page. */ + ret = sys_munmap(ptr, size); + if (seal) { + FAIL_TEST_IF_FALSE(ret < 0); + + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size * 3); + } else { + /* note: this will be OK, even the first page is */ + /* already unmapped. */ + FAIL_TEST_IF_FALSE(!ret); + + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 0); + } + + REPORT_TEST_PASS(); +} + +static void test_munmap_end_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap last page. */ + ret = sys_munmap(ptr + page_size * 3, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first 3 pages. */ + if (seal) { + ret = sys_mseal(ptr, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* unmap all pages. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_munmap_middle_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap 2 pages in the middle. */ + ret = sys_munmap(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first page. */ + if (seal) { + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* munmap all 4 pages. */ + ret = sys_munmap(ptr, size); + if (seal) { + FAIL_TEST_IF_FALSE(ret < 0); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + + size = get_vma_size(ptr + page_size * 3, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + } else { + FAIL_TEST_IF_FALSE(!ret); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == 0); + + size = get_vma_size(ptr + page_size * 3, &prot); + FAIL_TEST_IF_FALSE(size == 0); + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_shrink(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* shrink from 4 pages to 2 pages. */ + ret2 = sys_mremap(ptr, size, 2 * page_size, 0, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == (void *) MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != (void *) MAP_FAILED); + + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_expand(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + /* ummap last 2 pages. */ + ret = sys_munmap(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* expand from 2 page to 4 pages. */ + ret2 = sys_mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 == ptr); + + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_move(bool seal) +{ + void *ptr, *newPtr; + unsigned long page_size = getpagesize(); + unsigned long size = page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newPtr); + FAIL_TEST_IF_FALSE(newPtr != (void *)-1); + ret = clean_single_address(newPtr, size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* move from ptr to fixed address. */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mmap_overwrite_prot(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to change protection. */ + ret2 = mmap(ptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mmap_expand(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 12 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + /* ummap last 4 pages. */ + ret = sys_munmap(ptr + 8 * page_size, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, 8 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to expand. */ + ret2 = mmap(ptr, size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mmap_shrink(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 12 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to shrink. */ + ret2 = mmap(ptr, 8 * page_size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_shrink_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move and shrink to fixed address */ + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_expand_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(newAddr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move and expand to fixed address */ + ret2 = sys_mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, + newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_move_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(newAddr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move to fixed address */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_move_fixed_zero(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * MREMAP_FIXED can move the mapping to zero address + */ + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 == 0); + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_move_dontunmap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move, and don't unmap src addr. */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + /* kernel will allocate a new address */ + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + } + + REPORT_TEST_PASS(); +} + +static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) +{ + void *ptr, *ptr2; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * The new address is any address that not allocated. + * use allocate/free to similate that. + */ + setup_single_address(size, &ptr2); + FAIL_TEST_IF_FALSE(ptr2 != (void *)-1); + ret = sys_munmap(ptr2, size); + FAIL_TEST_IF_FALSE(!ret); + + /* + * remap to any address. + */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + (void *) ptr2); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + /* remap success and return ptr2 */ + FAIL_TEST_IF_FALSE(ret2 == ptr2); + } + + REPORT_TEST_PASS(); +} + +static void test_seal_merge_and_split(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size; + int ret; + int prot; + + /* (24 RO) */ + setup_single_address(24 * page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect(NONE) to set out boundary */ + /* (1 NONE) (22 RO) (1 NONE) */ + ret = sys_mprotect(ptr, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(!ret); + ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 22 * page_size); + FAIL_TEST_IF_FALSE(prot == 4); + + /* use mseal to split from beginning */ + /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */ + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 2 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 21 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* use mseal to split from the end. */ + /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 22 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 22 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 2 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 20 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge with prev. */ + /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 2 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge with after. */ + /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 21 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 21 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 2 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* split and merge from prev */ + /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 1 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 3 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + ret = sys_munmap(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(ret < 0); + ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* split and merge from next */ + /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 20 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 20 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 3 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge from middle of prev and middle of next. */ + /* (1 NONE) (22 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, 20 * page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 22 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_ro_anon_on_rw(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address_rw(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't take effect on RW memory. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + /* base seal still apply. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_ro_anon_on_pkey(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int pkey; + + SKIP_TEST_IF_FALSE(pkey_supported()); + + setup_single_address_rw(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + FAIL_TEST_IF_FALSE(pkey > 0); + + ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't take effect if PKRU allow write. */ + set_pkey(pkey, PKEY_UNRESTRICTED); + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + /* sealing will take effect if PKRU deny write. */ + set_pkey(pkey, PKEY_DISABLE_WRITE); + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* base seal still apply. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_ro_anon_on_filebacked(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int fd; + unsigned long mapflags = MAP_PRIVATE; + + fd = memfd_create("test", 0); + FAIL_TEST_IF_FALSE(fd > 0); + + ret = fallocate(fd, 0, 0, size); + FAIL_TEST_IF_FALSE(!ret); + + ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0); + FAIL_TEST_IF_FALSE(ptr != MAP_FAILED); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't apply for file backed mapping. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + close(fd); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_ro_anon_on_shared(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED; + + ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't apply for shared mapping. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_ro_anon(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +static void test_seal_discard_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + + +static void test_seal_madvise_nodiscard(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * Test a random madvise flag like MADV_RANDOM that does not touch page + * contents (and thus should work for msealed VMAs). RANDOM also happens to + * share bits with other discard-ish flags like REMOVE. + */ + ret = sys_madvise(ptr, size, MADV_RANDOM); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + +int main(void) +{ + bool test_seal = seal_support(); + + ksft_print_header(); + + if (!test_seal) + ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n"); + + if (!pkey_supported()) + ksft_print_msg("PKEY not supported\n"); + + ksft_set_plan(88); + + test_seal_addseal(); + test_seal_unmapped_start(); + test_seal_unmapped_middle(); + test_seal_unmapped_end(); + test_seal_multiple_vmas(); + test_seal_split_start(); + test_seal_split_end(); + test_seal_invalid_input(); + test_seal_zero_length(); + test_seal_twice(); + + test_seal_mprotect(false); + test_seal_mprotect(true); + + test_seal_start_mprotect(false); + test_seal_start_mprotect(true); + + test_seal_end_mprotect(false); + test_seal_end_mprotect(true); + + test_seal_mprotect_unalign_len(false); + test_seal_mprotect_unalign_len(true); + + test_seal_mprotect_unalign_len_variant_2(false); + test_seal_mprotect_unalign_len_variant_2(true); + + test_seal_mprotect_two_vma(false); + test_seal_mprotect_two_vma(true); + + test_seal_mprotect_two_vma_with_split(false); + test_seal_mprotect_two_vma_with_split(true); + + test_seal_mprotect_partial_mprotect(false); + test_seal_mprotect_partial_mprotect(true); + + test_seal_mprotect_two_vma_with_gap(); + test_seal_mprotect_two_vma_with_gap(); + + test_seal_mprotect_merge(false); + test_seal_mprotect_merge(true); + + test_seal_mprotect_split(false); + test_seal_mprotect_split(true); + + test_seal_mprotect_partial_mprotect_tail(false); + test_seal_mprotect_partial_mprotect_tail(true); + + test_seal_munmap(false); + test_seal_munmap(true); + test_seal_munmap_two_vma(false); + test_seal_munmap_two_vma(true); + test_seal_munmap_vma_with_gap(false); + test_seal_munmap_vma_with_gap(true); + test_seal_munmap_partial_across_vmas(false); + test_seal_munmap_partial_across_vmas(true); + + test_munmap_start_freed(false); + test_munmap_start_freed(true); + test_munmap_middle_freed(false); + test_munmap_middle_freed(true); + test_munmap_end_freed(false); + test_munmap_end_freed(true); + + test_seal_mremap_shrink(false); + test_seal_mremap_shrink(true); + test_seal_mremap_expand(false); + test_seal_mremap_expand(true); + test_seal_mremap_move(false); + test_seal_mremap_move(true); + + test_seal_mremap_shrink_fixed(false); + test_seal_mremap_shrink_fixed(true); + test_seal_mremap_expand_fixed(false); + test_seal_mremap_expand_fixed(true); + test_seal_mremap_move_fixed(false); + test_seal_mremap_move_fixed(true); + test_seal_mremap_move_dontunmap(false); + test_seal_mremap_move_dontunmap(true); + test_seal_mremap_move_fixed_zero(false); + test_seal_mremap_move_fixed_zero(true); + test_seal_mremap_move_dontunmap_anyaddr(false); + test_seal_mremap_move_dontunmap_anyaddr(true); + test_seal_madvise_nodiscard(false); + test_seal_madvise_nodiscard(true); + test_seal_discard_ro_anon(false); + test_seal_discard_ro_anon(true); + test_seal_discard_across_vmas(false); + test_seal_discard_across_vmas(true); + test_seal_discard_ro_anon_on_rw(false); + test_seal_discard_ro_anon_on_rw(true); + test_seal_discard_ro_anon_on_shared(false); + test_seal_discard_ro_anon_on_shared(true); + test_seal_discard_ro_anon_on_filebacked(false); + test_seal_discard_ro_anon_on_filebacked(true); + test_seal_mmap_overwrite_prot(false); + test_seal_mmap_overwrite_prot(true); + test_seal_mmap_expand(false); + test_seal_mmap_expand(true); + test_seal_mmap_shrink(false); + test_seal_mmap_shrink(true); + + test_seal_merge_and_split(); + test_seal_zero_address(); + + test_seal_discard_ro_anon_on_pkey(false); + test_seal_discard_ro_anon_on_pkey(true); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile new file mode 100644 index 000000000000..8c8bb39ffa28 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/Makefile @@ -0,0 +1,18 @@ +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = page_frag_test.ko + +obj-m += page_frag_test.o + +all: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c new file mode 100644 index 000000000000..e806c1866e36 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for page_frag cache + * + * Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> + */ + +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/completion.h> +#include <linux/ptr_ring.h> +#include <linux/kthread.h> +#include <linux/page_frag_cache.h> + +#define TEST_FAILED_PREFIX "page_frag_test failed: " + +static struct ptr_ring ptr_ring; +static int nr_objs = 512; +static atomic_t nthreads; +static struct completion wait; +static struct page_frag_cache test_nc; +static int test_popped; +static int test_pushed; +static bool force_exit; + +static int nr_test = 2000000; +module_param(nr_test, int, 0); +MODULE_PARM_DESC(nr_test, "number of iterations to test"); + +static bool test_align; +module_param(test_align, bool, 0); +MODULE_PARM_DESC(test_align, "use align API for testing"); + +static int test_alloc_len = 2048; +module_param(test_alloc_len, int, 0); +MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); + +static int test_push_cpu; +module_param(test_push_cpu, int, 0); +MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment"); + +static int test_pop_cpu; +module_param(test_pop_cpu, int, 0); +MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment"); + +static int page_frag_pop_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag pop test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_popped < nr_test) { + void *obj = __ptr_ring_consume(ring); + + if (obj) { + test_popped++; + page_frag_free(obj); + } else { + if (force_exit) + break; + + cond_resched(); + } + } + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + pr_info("page_frag pop test thread exits on cpu %d\n", + smp_processor_id()); + + return 0; +} + +static int page_frag_push_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag push test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_pushed < nr_test && !force_exit) { + void *va; + int ret; + + if (test_align) { + va = page_frag_alloc_align(&test_nc, test_alloc_len, + GFP_KERNEL, SMP_CACHE_BYTES); + + if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) { + force_exit = true; + WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n"); + } + } else { + va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL); + } + + if (!va) + continue; + + ret = __ptr_ring_produce(ring, va); + if (ret) { + page_frag_free(va); + cond_resched(); + } else { + test_pushed++; + } + } + + pr_info("page_frag push test thread exits on cpu %d\n", + smp_processor_id()); + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + return 0; +} + +static int __init page_frag_test_init(void) +{ + struct task_struct *tsk_push, *tsk_pop; + int last_pushed = 0, last_popped = 0; + ktime_t start; + u64 duration; + int ret; + + page_frag_cache_init(&test_nc); + atomic_set(&nthreads, 2); + init_completion(&wait); + + if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 || + !cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu)) + return -EINVAL; + + ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL); + if (ret) + return ret; + + tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring, + test_push_cpu, "page_frag_push"); + if (IS_ERR(tsk_push)) + return PTR_ERR(tsk_push); + + tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring, + test_pop_cpu, "page_frag_pop"); + if (IS_ERR(tsk_pop)) { + kthread_stop(tsk_push); + return PTR_ERR(tsk_pop); + } + + start = ktime_get(); + wake_up_process(tsk_push); + wake_up_process(tsk_pop); + + pr_info("waiting for test to complete\n"); + + while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) { + /* exit if there is no progress for push or pop size */ + if (last_pushed == test_pushed || last_popped == test_popped) { + WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n"); + force_exit = true; + continue; + } + + last_pushed = test_pushed; + last_popped = test_popped; + pr_info("page_frag_test progress: pushed = %d, popped = %d\n", + test_pushed, test_popped); + } + + if (force_exit) { + pr_err(TEST_FAILED_PREFIX "exit with error\n"); + goto out; + } + + duration = (u64)ktime_us_delta(ktime_get(), start); + pr_info("%d of iterations for %s testing took: %lluus\n", nr_test, + test_align ? "aligned" : "non-aligned", duration); + +out: + ptr_ring_cleanup(&ptr_ring, NULL); + page_frag_cache_drain(&test_nc); + + return -EAGAIN; +} + +static void __exit page_frag_test_exit(void) +{ +} + +module_init(page_frag_test_init); +module_exit(page_frag_test_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>"); +MODULE_DESCRIPTION("Test module for page_frag"); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index d59517ed3d48..b07acc86f4f0 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,8 +34,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -int page_size; -int hpage_size; +unsigned long page_size; +unsigned int hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -112,7 +112,7 @@ int init_uffd(void) return 0; } -int wp_init(void *lpBaseAddress, int dwRegionSize) +int wp_init(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; struct uffdio_writeprotect wp; @@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize) return 0; } -int wp_free(void *lpBaseAddress, int dwRegionSize) +int wp_free(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid) int userfaultfd_tests(void) { - int mem_size, vec_size, written, num_pages = 16; + long mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void) written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) - ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -235,7 +235,9 @@ int get_reads(struct page_region *vec, int vec_size) int sanity_tests_sd(void) { - int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0; + unsigned long long mem_size, vec_size, i, total_pages = 0; + long ret, ret2, ret3; + int num_pages = 1000; int total_writes, total_reads, reads, count; struct page_region *vec, *vec2; char *mem, *m[2]; @@ -321,9 +323,9 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == mem_size/(page_size * 2), + ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2), "%s Repeated pattern of written and non-written pages\n", __func__); /* 4. Repeated pattern of written and non-written pages in parts */ @@ -331,21 +333,21 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret3 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno)); ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, - "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", + "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n", __func__, ret, ret3, ret2); /* 5. Repeated pattern of written and non-written pages max_pages */ @@ -357,13 +359,13 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == num_pages/2 && ret2 == 1, "%s Repeated pattern of written and non-written pages max_pages\n", @@ -378,12 +380,12 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == 2 && vec[0].start == (uintptr_t)(mem + page_size) && @@ -416,7 +418,7 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size, "%s Two regions\n", __func__); @@ -448,7 +450,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); for (i = 0; i < mem_size/page_size; i += 2) mem[i * page_size]++; @@ -457,7 +459,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -465,7 +467,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -473,7 +475,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -515,9 +517,9 @@ int sanity_tests_sd(void) vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - if (ret > vec_size) + if ((unsigned long)ret > vec_size) break; reads = get_reads(vec, ret); @@ -554,63 +556,63 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, 0, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end address\n"); ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with WP\n"); ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with 0 output buffer\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2), "Walk_end: Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size), "Walk_end: 1 max page\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: max pages\n"); @@ -621,49 +623,49 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_endsparse : Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end: 1 max page\n"); @@ -674,9 +676,10 @@ int sanity_tests_sd(void) return 0; } -int base_tests(char *prefix, char *mem, int mem_size, int skip) +int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) { - int vec_size, written; + unsigned long long vec_size; + int written; struct page_region *vec, *vec2; if (skip) { @@ -799,8 +802,8 @@ int hpage_unit_tests(void) char *map; int ret, ret2; size_t num_pages = 10; - int map_size = hpage_size * num_pages; - int vec_size = map_size/page_size; + unsigned long long map_size = hpage_size * num_pages; + unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; vec = malloc(sizeof(struct page_region) * vec_size); @@ -992,7 +995,7 @@ int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; - int vec_size = len / page_size; + long vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ @@ -1047,7 +1050,8 @@ static void test_simple(void) int sanity_tests(void) { - int mem_size, vec_size, ret, fd, i, buf_size; + unsigned long long mem_size, vec_size; + long ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; @@ -1156,7 +1160,7 @@ int sanity_tests(void) ret = stat(progname, &sbuf); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) @@ -1312,7 +1316,9 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, { struct pm_scan_arg arg = {0}; struct page_region rgns[256]; - int i, j, cnt, ret; + unsigned long long i, j; + long ret; + int cnt; arg.size = sizeof(struct pm_scan_arg); arg.start = (uintptr_t)mem; @@ -1330,7 +1336,7 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, ksft_exit_fail_msg("ioctl failed\n"); cnt = 0; - for (i = 0; i < ret; ++i) { + for (i = 0; i < (unsigned long)ret; ++i) { if (rgns[i].categories != PAGE_IS_WRITTEN) ksft_exit_fail_msg("wrong flags\n"); @@ -1384,9 +1390,10 @@ void *thread_proc(void *mem) static void transact_test(int page_size) { unsigned int i, count, extra_pages; + unsigned int c; pthread_t th; char *mem; - int ret, c; + int ret; if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) ksft_exit_fail_msg("pthread_barrier_init\n"); @@ -1405,9 +1412,9 @@ static void transact_test(int page_size) memset(mem, 0, 0x1000 * nthreads * pages_per_thread); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count > 0, "%s count %d\n", __func__, count); + ksft_test_result(count > 0, "%s count %u\n", __func__, count); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count == 0, "%s count %d\n", __func__, count); + ksft_test_result(count == 0, "%s count %u\n", __func__, count); finish = 0; for (i = 0; i < nthreads; ++i) @@ -1429,7 +1436,7 @@ static void transact_test(int page_size) ksft_exit_fail_msg("pthread_barrier_wait\n"); if (count > nthreads * access_per_thread) - ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n", + ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n", count, nthreads * access_per_thread, i); c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); @@ -1454,7 +1461,7 @@ static void transact_test(int page_size) * access and application gets page fault again for the same write. */ if (count < nthreads * access_per_thread) { - ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, + ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count, nthreads * access_per_thread); return; } @@ -1467,15 +1474,16 @@ static void transact_test(int page_size) finish = 1; pthread_barrier_wait(&end_barrier); - ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, + ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__, extra_pages, 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), extra_thread_faults); } -int main(int argc, char *argv[]) +int main(int __attribute__((unused)) argc, char *argv[]) { - int mem_size, shmid, buf_size, fd, i, ret; + int shmid, buf_size, fd, i, ret; + unsigned long long mem_size; char *mem, *map, *fmem; struct stat sbuf; @@ -1484,7 +1492,7 @@ int main(int argc, char *argv[]) ksft_print_header(); if (init_uffd()) - return ksft_exit_pass(); + ksft_exit_pass(); ksft_set_plan(115); @@ -1567,8 +1575,10 @@ int main(int argc, char *argv[]) /* 7. File Hugetlb testing */ mem_size = 2*1024*1024; fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL); + if (fd < 0) + ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (mem) { + if (mem != MAP_FAILED) { wp_init(mem, mem_size); wp_addr_range(mem, mem_size); @@ -1660,5 +1670,5 @@ int main(int argc, char *argv[]) userfaultfd_tests(); close(pagemap_fd); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 000000000000..866ac023baf5 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * + * Copyright 2025, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "vm_util.h" + +static sigjmp_buf sigjmp_buf_env; + +static void signal_handler(int sig) +{ + siglongjmp(sigjmp_buf_env, -EFAULT); +} + +static int test_read_access(char *addr, size_t size, size_t pagesize) +{ + size_t offs; + int ret; + + if (signal(SIGSEGV, signal_handler) == SIG_ERR) + return -EINVAL; + + ret = sigsetjmp(sigjmp_buf_env, 1); + if (!ret) { + for (offs = 0; offs < size; offs += pagesize) + /* Force a read that the compiler cannot optimize out. */ + *((volatile char *)(addr + offs)); + } + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) + return -EINVAL; + + return ret; +} + +static int find_ram_target(off_t *phys_addr, + unsigned long long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + +FIXTURE(pfnmap) +{ + off_t phys_addr; + size_t pagesize; + int dev_mem_fd; + char *addr1; + size_t size1; + char *addr2; + size_t size2; +}; + +FIXTURE_SETUP(pfnmap) +{ + self->pagesize = getpagesize(); + + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + + self->dev_mem_fd = open("/dev/mem", O_RDONLY); + if (self->dev_mem_fd < 0) + SKIP(return, "Cannot open '/dev/mem'\n"); + + self->size1 = self->pagesize * 2; + self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + if (self->addr1 == MAP_FAILED) + SKIP(return, "Cannot mmap '/dev/mem'\n"); + + /* ... and want to be able to read from them. */ + if (test_read_access(self->addr1, self->size1, self->pagesize)) + SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + + self->size2 = 0; + self->addr2 = MAP_FAILED; +} + +FIXTURE_TEARDOWN(pfnmap) +{ + if (self->addr2 != MAP_FAILED) + munmap(self->addr2, self->size2); + if (self->addr1 != MAP_FAILED) + munmap(self->addr1, self->size1); + if (self->dev_mem_fd >= 0) + close(self->dev_mem_fd); +} + +TEST_F(pfnmap, madvise_disallowed) +{ + int advices[] = { + MADV_DONTNEED, + MADV_DONTNEED_LOCKED, + MADV_FREE, + MADV_WIPEONFORK, + MADV_COLD, + MADV_PAGEOUT, + MADV_POPULATE_READ, + MADV_POPULATE_WRITE, + }; + int i; + + /* All these advices must be rejected. */ + for (i = 0; i < ARRAY_SIZE(advices); i++) { + EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(pfnmap, munmap_split) +{ + /* + * Unmap the first page. This munmap() call is not really expected to + * fail, but we might be able to trigger other internal issues. + */ + ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); + + /* + * Remap the first page while the second page is still mapped. This + * makes sure that any PAT tracking on x86 will allow for mmap()'ing + * a page again while some parts of the first mmap() are still + * around. + */ + self->size2 = self->pagesize; + self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + ASSERT_NE(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_fixed) +{ + char *ret; + + /* Reserve a destination area. */ + self->size2 = self->size1; + self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(self->addr2, MAP_FAILED); + + /* mremap() over our destination. */ + ret = mremap(self->addr1, self->size1, self->size2, + MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_shrink) +{ + char *ret; + + /* Shrinking is expected to work. */ + ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_expand) +{ + /* + * Growing is not expected to work, and getting it right would + * be challenging. So this test primarily serves as an early warning + * that something that probably should never work suddenly works. + */ + self->size2 = self->size1 + self->pagesize; + self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE); + ASSERT_EQ(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, fork) +{ + pid_t pid; + int ret; + + /* fork() a child and test if the child can access the pages. */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + EXPECT_EQ(test_read_access(self->addr1, self->size1, + self->pagesize), 0); + exit(0); + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h new file mode 100644 index 000000000000..8e9685e03c44 --- /dev/null +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _PKEYS_ARM64_H +#define _PKEYS_ARM64_H + +#include "vm_util.h" +/* for signal frame parsing */ +#include "../arm64/signal/testcases/testcases.h" + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 288 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 289 +# define SYS_pkey_free 290 +#endif +#define MCONTEXT_IP(mc) mc.pc +#define MCONTEXT_TRAPNO(mc) -1 + +#define PKEY_MASK 0xf + +#define POE_NONE 0x0 +#define POE_X 0x2 +#define POE_RX 0x3 +#define POE_RWX 0x7 + +#define NR_PKEYS 8 +#define NR_RESERVED_PKEYS 1 /* pkey-0 */ + +#define PKEY_REG_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_NONE 0x0 + +#define PKEY_BITS_PER_PKEY 4 +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#undef HPAGE_SIZE +#define HPAGE_SIZE default_huge_page_size() + +/* 4-byte instructions * 16384 = 64K page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +static inline u64 __read_pkey_reg(void) +{ + u64 pkey_reg = 0; + + // POR_EL0 + asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + u64 por = pkey_reg; + + dprintf4("%s() changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); + + // POR_EL0 + asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :); + + dprintf4("%s() pkey register after changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); +} + +static inline int cpu_has_pkeys(void) +{ + /* No simple way to determine this */ + return 1; +} + +static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline int get_arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ +} + +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + +#define set_pkey_bits set_pkey_bits +static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + u32 shift = pkey_bit_position(pkey); + u64 new_val = POE_RWX; + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + + if (flags & PKEY_DISABLE_ACCESS) + new_val = POE_X; + else if (flags & PKEY_DISABLE_WRITE) + new_val = POE_RX; + + /* OR in new bits for pkey */ + reg |= new_val << shift; + + return reg; +} + +#define get_pkey_bits get_pkey_bits +static inline u64 get_pkey_bits(u64 reg, int pkey) +{ + u32 shift = pkey_bit_position(pkey); + /* + * shift down the relevant bits to the lowest four, then + * mask off all the other higher bits + */ + u32 perm = (reg >> shift) & PKEY_MASK; + + if (perm == POE_X) + return PKEY_DISABLE_ACCESS; + if (perm == POE_RX) + return PKEY_DISABLE_WRITE; + return 0; +} + +static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +{ + struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); + struct poe_context *poe_ctx = + (struct poe_context *) get_header(ctx, POE_MAGIC, + sizeof(uctxt->uc_mcontext), NULL); + if (poe_ctx) + poe_ctx->por_el0 = pkey; +} + +#endif /* _PKEYS_ARM64_H */ diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 1af3156a9db8..ea404f80e6cb 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,22 +13,23 @@ #include <ucontext.h> #include <sys/mman.h> +#include <linux/mman.h> +#include <linux/types.h> + #include "../kselftest.h" /* Define some kernel-like types */ -#define u8 __u8 -#define u16 __u16 -#define u32 __u32 -#define u64 __u64 +typedef __u8 u8; +typedef __u16 u16; +typedef __u32 u32; +typedef __u64 u64; #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) #ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 #endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; extern int test_nr; extern int iteration_nr; @@ -79,10 +80,22 @@ extern void abort_hooks(void); } \ } while (0) -__attribute__((noinline)) int read_ptr(int *ptr); -void expected_pkey_fault(int pkey); +#define barrier() __asm__ __volatile__("": : :"memory") +#ifndef noinline +# define noinline __attribute__((noinline)) +#endif +#ifndef __maybe_unused +# define __maybe_unused __attribute__((__unused__)) +#endif + int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey); + +/* For functions called from protection_keys.c only */ +noinline int read_ptr(int *ptr); +void expected_pkey_fault(int pkey); int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey); void record_pkey_malloc(void *ptr, long size, int prot); @@ -91,12 +104,24 @@ void record_pkey_malloc(void *ptr, long size, int prot); #include "pkey-x86.h" #elif defined(__powerpc64__) /* arch */ #include "pkey-powerpc.h" +#elif defined(__aarch64__) /* arch */ +#include "pkey-arm64.h" #else /* arch */ #error Architecture not supported #endif /* arch */ +#ifndef PKEY_MASK #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif +/* + * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged. + */ +#ifndef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 +#endif + +#ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { u32 shift = pkey_bit_position(pkey); @@ -106,7 +131,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) reg |= (flags & PKEY_MASK) << shift; return reg; } +#endif +#ifndef get_pkey_bits static inline u64 get_pkey_bits(u64 reg, int pkey) { u32 shift = pkey_bit_position(pkey); @@ -116,6 +143,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) */ return ((reg >> shift) & PKEY_MASK); } +#endif extern u64 shadow_pkey_reg; @@ -145,38 +173,6 @@ static inline void write_pkey_reg(u64 pkey_reg) pkey_reg, __read_pkey_reg()); } -/* - * These are technically racy. since something could - * change PKEY register between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2; - - if (do_allow) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); - write_pkey_reg(pkey_reg); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - write_pkey_reg(pkey_reg); - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); -} - #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) \ @@ -198,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) static inline int kernel_has_pkeys(void) { /* try allocating a key and see if it succeeds */ - int ret = sys_pkey_alloc(0, 0); + int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (ret <= 0) { return 0; } diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index ae5df26104e5..17bf2d1b0192 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,12 +3,17 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include <sys/stat.h> + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 #endif #define REG_IP_IDX PT_NIP +#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO] #define REG_TRAPNO PT_TRAP +#define MCONTEXT_FPREGS #define gregs gp_regs #define fpregs fp_regs #define si_pkey_offset 0x20 @@ -88,7 +93,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS_64K_3KEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { /* * powerpc does not allow userspace to change permissions of exec-only @@ -99,10 +104,20 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { void *ptr; int ret; diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 814758e109c0..f7ecd335df1e 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -15,6 +15,10 @@ #endif +#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO] +#define MCONTEXT_FPREGS + #ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 #endif @@ -30,6 +34,8 @@ #define PAGE_SIZE 4096 #define MB (1<<20) +#define PKEY_REG_ALLOW_NONE 0x55555555 + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ @@ -107,7 +113,7 @@ static inline u32 pkey_bit_position(int pkey) #define XSTATE_PKEY 0x200 #define XSTATE_BV_OFFSET 512 -int pkey_reg_xstate_offset(void) +static inline int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -142,7 +148,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { int ptr_contents; @@ -151,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) expected_pkey_fault(pkey); } -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { return PTR_ERR_ENOTSUP; } diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c new file mode 100644 index 000000000000..b5e076a564c9 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) + * + * The testcases in this file exercise various flows related to signal handling, + * using an alternate signal stack, with the default pkey (pkey 0) disabled. + * + * Compile with: + * gcc -mxsave -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <linux/mman.h> +#include <errno.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <limits.h> + +#include "pkey-helpers.h" + +#define STACK_SIZE PTHREAD_STACK_MIN + +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +static siginfo_t siginfo = {0}; + +/* + * We need to use inline assembly instead of glibc's syscall because glibc's + * syscall will attempt to access the PLT in order to call a library function + * which is protected by MPK 0 which we don't have access to. + */ +static inline __always_inline +long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) +{ + unsigned long ret; +#ifdef __x86_64__ + register long r10 asm("r10") = a4; + register long r8 asm("r8") = a5; + register long r9 asm("r9") = a6; + asm volatile ("syscall" + : "=a"(ret) + : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) + : "rcx", "r11", "memory"); +#elif defined __i386__ + asm volatile ("int $0x80" + : "=a"(ret) + : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) + : "memory"); +#elif defined __aarch64__ + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + register long x5 asm("x5") = a6; + register long x8 asm("x8") = n; + asm volatile ("svc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8) + : "memory"); + ret = x0; +#else +# error syscall_raw() not implemented +#endif + return ret; +} + +static inline long clone_raw(unsigned long flags, void *stack, + int *parent_tid, int *child_tid) +{ + long a1 = flags; + long a2 = (long)stack; + long a3 = (long)parent_tid; +#if defined(__x86_64__) || defined(__i386) + long a4 = (long)child_tid; + long a5 = 0; +#elif defined(__aarch64__) + long a4 = 0; + long a5 = (long)child_tid; +#else +# error clone_raw() not implemented +#endif + + return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0); +} + +/* + * Returns the most restrictive pkey register value that can be used by the + * tests. + */ +static inline u64 pkey_reg_restrictive_default(void) +{ + /* + * Disallow everything except execution on pkey 0, so that each caller + * doesn't need to enable it explicitly (the selftest code runs with + * its code mapped with pkey 0). + */ + return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS); +} + +static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); +} + +static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext) +{ + /* + * pkru should be the init_pkru value which enabled MPK 0 so + * we can use library functions. + */ + printf("%s invoked.\n", __func__); +} + +static void raise_sigusr2(void) +{ + pid_t tid = 0; + + tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0); + + syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0); + + /* + * We should return from the signal handler here and be able to + * return to the interrupted thread. + */ +} + +static void *thread_segv_with_pkey0_disabled(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(pkey_reg_restrictive_default()); + + /* Segfault (with SEGV_MAPERR) */ + *(volatile int *)NULL = 1; + return NULL; +} + +static void *thread_segv_pkuerr_stack(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(pkey_reg_restrictive_default()); + + /* After we disable MPK 0, we can't access the stack to return */ + return NULL; +} + +static void *thread_segv_maperr_ptr(void *ptr) +{ + stack_t *stack = ptr; + u64 pkey_reg; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 1 is enabled. */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); + + /* Segfault */ + *(volatile int *)NULL = 1; + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0. + */ +static void test_sigsegv_handler_with_pkey0_disabled(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == NULL, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0, which renders them inaccessible when MPK 0 + * is disabled. So just the return from the thread should cause a + * segfault with SEGV_PKUERR. + */ +static void test_sigsegv_handler_cannot_access_stack(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_PKUERR, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler that uses an alternate signal stack + * is correctly invoked for a thread which uses a non-zero MPK to protect + * its own stack, and disables all other MPKs (including 0). + */ +static void test_sigsegv_handler_with_different_pkey_for_stack(void) +{ + struct sigaction sa; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + u64 pkey_reg; + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + + sa.sa_sigaction = sigsegv_handler; + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* Allow access to MPK 0 and MPK 1 */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); + + /* Protect the new stack with MPK 1 */ + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + memset(&siginfo, 0, sizeof(siginfo)); + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_segv_maperr_ptr(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == NULL, + "%s\n", __func__); +} + +/* + * Verify that the PKRU value set by the application is correctly + * restored upon return from signal handling. + */ +static void test_pkru_preserved_after_sigusr1(void) +{ + struct sigaction sa; + u64 pkey_reg; + + /* Allow access to MPK 0 and an arbitrary set of keys */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED); + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigusr1_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + __write_pkey_reg(pkey_reg); + + raise(SIGUSR1); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + /* Ensure the pkru value is the same after returning from signal. */ + ksft_test_result(pkey_reg == __read_pkey_reg() && + siginfo.si_signo == SIGUSR1, + "%s\n", __func__); +} + +static noinline void *thread_sigusr2_self(void *ptr) +{ + /* + * A const char array like "Resuming after SIGUSR2" won't be stored on + * the stack and the code could access it via an offset from the program + * counter. This makes sure it's on the function's stack frame. + */ + char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ', + 'a', 'f', 't', 'e', 'r', ' ', + 'S', 'I', 'G', 'U', 'S', 'R', '2', + '.', '.', '.', '\n', '\0'}; + stack_t *stack = ptr; + u64 pkey_reg; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 2 is enabled. */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); + + raise_sigusr2(); + + /* Do something, to show the thread resumed execution after the signal */ + syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0); + + /* + * We can't return to test_pkru_sigreturn because it + * will attempt to use a %rbp value which is on the stack + * of the main thread. + */ + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that sigreturn is able to restore altstack even if the thread had + * disabled pkey 0. + */ +static void test_pkru_sigreturn(void) +{ + struct sigaction sa = {0}; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + u64 pkey_reg; + + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + + /* + * For this testcase, we do not want to handle SIGSEGV. Reset handler + * to default so that the application can crash if it receives SIGSEGV. + */ + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = sigusr2_handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGUSR2, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* + * Allow access to MPK 0 and MPK 2. The child thread (to be created + * later in this flow) will have its stack protected by MPK 2, whereas + * the current thread's stack is protected by the default MPK 0. Hence + * both need to be enabled. + */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); + + /* Protect the stack with MPK 2 */ + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_sigusr2_self(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + child_pid = ret; + /* Check that thread exited */ + do { + sched_yield(); + ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0); + } while (ret != -ESRCH && ret != -EINVAL); + + ksft_test_result_pass("%s\n", __func__); +} + +static void (*pkey_tests[])(void) = { + test_sigsegv_handler_with_pkey0_disabled, + test_sigsegv_handler_cannot_access_stack, + test_sigsegv_handler_with_different_pkey_for_stack, + test_pkru_preserved_after_sigusr1, + test_pkru_sigreturn +}; + +int main(int argc, char *argv[]) +{ + int i; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(pkey_tests)); + + if (!is_pkeys_supported()) + ksft_exit_skip("pkeys not supported\n"); + + for (i = 0; i < ARRAY_SIZE(pkey_tests); i++) + (*pkey_tests[i])(); + + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c new file mode 100644 index 000000000000..255b332f7a08 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_util.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ +#include <sys/syscall.h> +#include <unistd.h> + +#include "pkey-helpers.h" + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 374a308174d2..23ebec367015 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -53,10 +53,15 @@ int test_nr; u64 shadow_pkey_reg; int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -char buf[256]; -void cat_into_file(char *str, char *file) +noinline int read_ptr(int *ptr) +{ + /* Keep GCC from optimizing this away somehow */ + barrier(); + return *ptr; +} + +static void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); int ret; @@ -83,7 +88,7 @@ void cat_into_file(char *str, char *file) #if CONTROL_TRACING > 0 static int warned_tracing; -int tracing_root_ok(void) +static int tracing_root_ok(void) { if (geteuid() != 0) { if (!warned_tracing) @@ -96,7 +101,7 @@ int tracing_root_ok(void) } #endif -void tracing_on(void) +static void tracing_on(void) { #if CONTROL_TRACING > 0 #define TRACEDIR "/sys/kernel/tracing" @@ -120,7 +125,7 @@ void tracing_on(void) #endif } -void tracing_off(void) +static void tracing_off(void) { #if CONTROL_TRACING > 0 if (!tracing_root_ok()) @@ -148,13 +153,13 @@ void abort_hooks(void) * will then fault, which makes sure that the fault code handles * execute-only memory properly. */ -#ifdef __powerpc64__ +#if defined(__powerpc64__) || defined(__aarch64__) /* This way, both 4K and 64K alignment are maintained */ __attribute__((__aligned__(65536))) #else __attribute__((__aligned__(PAGE_SIZE))) #endif -void lots_o_noops_around_write(int *write_to_me) +static void lots_o_noops_around_write(int *write_to_me) { dprintf3("running %s()\n", __func__); __page_o_noops(); @@ -165,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); } -void dump_mem(void *dumpme, int len_bytes) +static void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; int i; @@ -208,12 +213,11 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) return 0; } -void pkey_disable_set(int pkey, int flags) +static void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; int pkey_rights; - u64 orig_pkey_reg = read_pkey_reg(); dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -243,18 +247,15 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - pkey_assert(read_pkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } -void pkey_disable_clear(int pkey, int flags) +static void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u64 orig_pkey_reg = read_pkey_reg(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -274,23 +275,21 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - assert(read_pkey_reg() <= orig_pkey_reg); } -void pkey_write_allow(int pkey) +__maybe_unused static void pkey_write_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); } -void pkey_write_deny(int pkey) +__maybe_unused static void pkey_write_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_WRITE); } -void pkey_access_allow(int pkey) +__maybe_unused static void pkey_access_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); } -void pkey_access_deny(int pkey) +__maybe_unused static void pkey_access_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } @@ -308,14 +307,16 @@ static char *si_code_str(int si_code) return "UNKNOWN"; } -int pkey_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) +static int pkey_faults; +static int last_si_pkey = -1; +static void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; +#ifdef MCONTEXT_FPREGS char *fpregs; +#endif #if defined(__i386__) || defined(__x86_64__) /* arch */ u32 *pkey_reg_ptr; int pkey_reg_offset; @@ -329,9 +330,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext); + ip = MCONTEXT_IP(uctxt->uc_mcontext); +#ifdef MCONTEXT_FPREGS fpregs = (char *) uctxt->uc_mcontext.fpregs; +#endif dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), @@ -360,7 +363,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #endif /* arch */ dprintf1("siginfo: %p\n", si); +#ifdef MCONTEXT_FPREGS dprintf1(" fpregs: %p\n", fpregs); +#endif if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -390,26 +395,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #elif defined(__powerpc64__) /* arch */ /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); +#elif defined(__aarch64__) + aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; } -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) +static void sig_chld(int x) { dprint_in_signal = 1; dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); dprint_in_signal = 0; } -void setup_sigsegv_handler(void) +static void setup_sigsegv_handler(void) { int r, rs; struct sigaction newact; @@ -435,13 +436,13 @@ void setup_sigsegv_handler(void) pkey_assert(r == 0); } -void setup_handlers(void) +static void setup_handlers(void) { signal(SIGCHLD, &sig_chld); setup_sigsegv_handler(); } -pid_t fork_lazy_child(void) +static pid_t fork_lazy_child(void) { pid_t forkret; @@ -459,38 +460,10 @@ pid_t fork_lazy_child(void) return forkret; } -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) +static int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + unsigned long init_val = PKEY_UNRESTRICTED; dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); @@ -533,19 +506,12 @@ int alloc_pkey(void) return ret; } -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - /* * I had a bug where pkey bits could be set by mprotect() but * not cleared. This ensures we get lots of random bit sets * and clears on the vma and pte pkey bits. */ -int alloc_random_pkey(void) +static int alloc_random_pkey(void) { int max_nr_pkey_allocs; int ret; @@ -628,7 +594,7 @@ struct pkey_malloc_record { }; struct pkey_malloc_record *pkey_malloc_records; struct pkey_malloc_record *pkey_last_malloc_record; -long nr_pkey_malloc_records; +static long nr_pkey_malloc_records; void record_pkey_malloc(void *ptr, long size, int prot) { long i; @@ -666,7 +632,7 @@ void record_pkey_malloc(void *ptr, long size, int prot) nr_pkey_malloc_records++; } -void free_pkey_malloc(void *ptr) +static void free_pkey_malloc(void *ptr) { long i; int ret; @@ -693,8 +659,7 @@ void free_pkey_malloc(void *ptr) pkey_assert(false); } - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) { void *ptr; int ret; @@ -714,7 +679,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) { int ret; void *ptr; @@ -744,10 +709,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) return ptr; } -int hugetlb_setup_ok; +static int hugetlb_setup_ok; #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" #define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) +static void setup_hugetlbfs(void) { int err; int fd; @@ -795,7 +760,7 @@ void setup_hugetlbfs(void) hugetlb_setup_ok = 1; } -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) { void *ptr; int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; @@ -816,42 +781,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { +static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { malloc_pkey_with_mprotect, malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ }; -void *malloc_pkey(long size, int prot, u16 pkey) +static void *malloc_pkey(long size, int prot, u16 pkey) { void *ret; static int malloc_type; @@ -881,7 +819,7 @@ void *malloc_pkey(long size, int prot, u16 pkey) return ret; } -int last_pkey_faults; +static int last_pkey_faults; #define UNKNOWN_PKEY -2 void expected_pkey_fault(int pkey) { @@ -903,7 +841,9 @@ void expected_pkey_fault(int pkey) * test program continue. We now have to restore it. */ if (__read_pkey_reg() != 0) -#else /* arch */ +#elif defined(__aarch64__) + if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL) +#else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ pkey_assert(0); @@ -921,9 +861,9 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults == pkey_faults); \ } while (0) -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) +static int test_fds[10] = { -1 }; +static int nr_test_fds; +static void __save_test_fd(int fd) { pkey_assert(fd >= 0); pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); @@ -931,14 +871,14 @@ void __save_test_fd(int fd) nr_test_fds++; } -int get_test_read_fd(void) +static int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); __save_test_fd(test_fd); return test_fd; } -void close_test_fds(void) +static void close_test_fds(void) { int i; @@ -951,17 +891,7 @@ void close_test_fds(void) nr_test_fds = 0; } -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - -void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) { int i, err; int max_nr_pkey_allocs; @@ -1013,7 +943,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) pkey_assert(!err); } -void test_read_of_write_disabled_region(int *ptr, u16 pkey) +static void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1023,7 +953,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); dprintf1("\n"); } -void test_read_of_access_disabled_region(int *ptr, u16 pkey) +static void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1035,7 +965,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { int ptr_contents; @@ -1052,7 +982,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1063,14 +993,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region(int *ptr, u16 pkey) +static void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); @@ -1078,7 +1008,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1089,7 +1019,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1101,7 +1031,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("read ret: %d\n", ret); pkey_assert(ret); } -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) + +static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1114,7 +1045,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) pkey_assert(ret); } -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) { int pipe_ret, vmsplice_ret; struct iovec iov; @@ -1136,7 +1067,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) close(pipe_fds[1]); } -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) { int ignored = 0xdada; int futex_ret; @@ -1154,7 +1085,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; int i; @@ -1177,7 +1108,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) { int err; int bad_pkey = NR_PKEYS+99; @@ -1187,7 +1118,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } -void become_child(void) +static void become_child(void) { pid_t forkret; @@ -1203,7 +1134,7 @@ void become_child(void) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +static void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1270,7 +1201,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } -void arch_force_pkey_reg_init(void) +static void arch_force_pkey_reg_init(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ u64 *buf; @@ -1309,7 +1240,7 @@ void arch_force_pkey_reg_init(void) * a long-running test that continually checks the pkey * register. */ -void test_pkey_init_state(int *ptr, u16 pkey) +static void test_pkey_init_state(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1347,7 +1278,7 @@ void test_pkey_init_state(int *ptr, u16 pkey) * have to call pkey_alloc() to use it first. Make sure that it * is usable. */ -void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) { long size; int prot; @@ -1371,7 +1302,7 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); } -void test_ptrace_of_child(int *ptr, u16 pkey) +static void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; pid_t child_pid; @@ -1447,7 +1378,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void *get_pointer_to_instructions(void) +static void *get_pointer_to_instructions(void) { void *p1; @@ -1468,7 +1399,7 @@ void *get_pointer_to_instructions(void) return p1; } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +static void test_executing_on_unreadable_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1493,9 +1424,14 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); expect_fault_on_read_execonly_key(p1, pkey); + + // Reset back to PROT_EXEC | PROT_READ for architectures that support + // non-PKEY execute-only permissions. + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey); + pkey_assert(!ret); } -void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1544,7 +1480,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) } #if defined(__i386__) || defined(__x86_64__) -void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) { u32 new_pkru; pid_t child; @@ -1666,7 +1602,85 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +#if defined(__aarch64__) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + pid_t child; + int status, ret; + struct iovec iov; + u64 trace_pkey; + /* Just a random pkey value.. */ + u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) | + (POE_NONE << PKEY_BITS_PER_PKEY) | + POE_RWX; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkey) + exit(1); + + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + iov.iov_base = &trace_pkey; + iov.iov_len = 8; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == read_pkey_reg()); + + trace_pkey = new_pkey; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); +} +#endif + +static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; int sret; @@ -1680,7 +1694,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) pkey_assert(sret < 0); } -void (*pkey_tests[])(int *ptr, u16 pkey) = { +static void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, @@ -1701,12 +1715,12 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) test_ptrace_modifies_pkru, #endif }; -void run_tests_once(void) +static void run_tests_once(void) { int *ptr; int prot = PROT_READ|PROT_WRITE; @@ -1740,47 +1754,11 @@ void run_tests_once(void) iteration_nr++; } -void pkey_setup_shadow(void) +static void pkey_setup_shadow(void) { shadow_pkey_reg = __read_pkey_reg(); } -pid_t parent_pid; - -void restore_settings_atexit(void) -{ - if (parent_pid == getpid()) - cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); -} - -void save_settings(void) -{ - int fd; - int err; - - if (geteuid()) - return; - - fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY); - if (fd < 0) { - fprintf(stderr, "error opening\n"); - perror("error: "); - exit(__LINE__); - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - if (err < 0) { - fprintf(stderr, "error reading\n"); - perror("error: "); - exit(__LINE__); - } - - parent_pid = getpid(); - atexit(restore_settings_atexit); - close(fd); -} - int main(void) { int nr_iterations = 22; @@ -1788,7 +1766,6 @@ int main(void) srand((unsigned int)time(NULL)); - save_settings(); setup_handlers(); printf("has pkeys: %d\n", pkeys_supported); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c2c542fe7b17..dddd1dd8af14 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -45,6 +45,8 @@ separated by spaces: vmalloc smoke tests - hmm hmm smoke tests +- madv_guard + test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options - madv_populate test memadvise(2) MADV_POPULATE_{READ,WRITE} options - memfd_secret @@ -61,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics - pagemap test pagemap_scan IOCTL +- pfnmap + tests for VM_PFNMAP handling - cow test copy-on-write semantics - thp @@ -75,6 +79,10 @@ separated by spaces: read-only VMAs - mdwe test prctl(PR_SET_MDWE, ...) +- page_frag + test handling of page fragment allocation and freeing +- vma_merge + test VMA merge cases behave as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -152,9 +160,13 @@ done < /proc/meminfo # both of these requirements into account and attempt to increase # number of huge pages available. nr_cpus=$(nproc) -hpgsize_MB=$((hpgsize_KB / 1024)) -half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) -needmem_KB=$((half_ufd_size_MB * 2 * 1024)) +uffd_min_KB=$((hpgsize_KB * nr_cpus * 2)) +hugetlb_min_KB=$((256 * 1024)) +if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then + needmem_KB=$uffd_min_KB +else + needmem_KB=$hugetlb_min_KB +fi # set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then @@ -179,13 +191,14 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" fi + HAVE_HUGEPAGES=1 else echo "no hugetlbfs support in kernel?" - exit 1 + HAVE_HUGEPAGES=0 fi # filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" +ARCH64STR="arm64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi @@ -210,13 +223,20 @@ pretty_name() { # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local skip=0 + # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. - if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then - echo 3 > /proc/sys/vm/drop_caches - sleep 2 - echo 1 > /proc/sys/vm/compact_memory - sleep 2 + if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + echo 3 > /proc/sys/vm/drop_caches + sleep 2 + echo 1 > /proc/sys/vm/compact_memory + sleep 2 + else + echo "hugepages not supported" | tap_prefix + skip=1 + fi fi local test=$(pretty_name "$*") @@ -224,8 +244,12 @@ run_test() { local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - ("$@" 2>&1) | tap_prefix - local ret=${PIPESTATUS[0]} + if [ "${skip}" != "1" ]; then + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + else + local ret=$ksft_skip + fi count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) @@ -261,14 +285,17 @@ CATEGORY="hugetlb" run_test ./map_hugetlb CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise - -nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) -# For this test, we need one and just one huge page -echo 1 > /proc/sys/vm/nr_hugepages -CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv -CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map -# Restore the previous number of huge pages, since further tests rely on it -echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb_dio + +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) + # For this test, we need one and just one huge page + echo 1 > /proc/sys/vm/nr_hugepages + CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv + CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map + # Restore the previous number of huge pages, since further tests rely on it + echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +fi if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix @@ -294,18 +321,43 @@ CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half -# the size ($half_ufd_size_MB), which is used for *each*. +# the size of the free pages we have, which is used for *each*. +# uffd-stress expects a region expressed in MiB, so we adjust +# half_ufd_size_MB accordingly. +half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 +# uffd-wp-mremap requires at least one page of each size. +have_all_size_hugepgs=true +declare -A nr_size_hugepgs +for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do + old=$(cat $f) + nr_size_hugepgs["$f"]="$old" + if [ "$old" == 0 ]; then + echo 1 > "$f" + fi + if [ $(cat "$f") == 0 ]; then + have_all_size_hugepgs=false + break + fi +done +if $have_all_size_hugepgs; then + CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +else + echo "# SKIP ./uffd-wp-mremap" +fi #cleanup +for f in "${!nr_size_hugepgs[@]}"; do + echo "${nr_size_hugepgs["$f"]}" > "$f" +done echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -if command -v sudo &> /dev/null; +if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null; then CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit else @@ -326,6 +378,12 @@ CATEGORY="hugetlb" run_test ./thuge-gen CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2 CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2 if $RUN_DESTRUCTIVE; then +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) +enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline) +echo 8 > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb-soft-offline +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison fi @@ -335,10 +393,12 @@ if [ $VADDR64 -ne 0 ]; then # allows high virtual address allocation requests independent # of platform's physical memory. - prev_policy=$(cat /proc/sys/vm/overcommit_memory) - echo 1 > /proc/sys/vm/overcommit_memory - CATEGORY="hugevm" run_test ./virtual_address_range - echo $prev_policy > /proc/sys/vm/overcommit_memory + if [ -x ./virtual_address_range ]; then + prev_policy=$(cat /proc/sys/vm/overcommit_memory) + echo 1 > /proc/sys/vm/overcommit_memory + CATEGORY="hugevm" run_test ./virtual_address_range + echo $prev_policy > /proc/sys/vm/overcommit_memory + fi # va high address boundary switch test ARCH_ARM64="arm64" @@ -359,14 +419,24 @@ CATEGORY="mremap" run_test ./mremap_dontunmap CATEGORY="hmm" run_test bash ./test_hmm.sh smoke +# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests +CATEGORY="madv_guard" run_test ./guard-regions + # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate -(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +CATEGORY="vma_merge" run_test ./merge + +if [ -x ./memfd_secret ] +then +(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 -CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +fi # KSM KSM_MERGE_TIME test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages @@ -385,6 +455,7 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests # protection_keys tests +nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ -x ./protection_keys_32 ] then CATEGORY="pkey" run_test ./protection_keys_32 @@ -394,6 +465,7 @@ if [ -x ./protection_keys_64 ] then CATEGORY="pkey" run_test ./protection_keys_64 fi +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages if [ -x ./soft-dirty ] then @@ -402,6 +474,8 @@ fi CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap + # COW tests CATEGORY="cow" run_test ./cow @@ -413,15 +487,17 @@ CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then - if test_selected "thp"; then - if grep xfs /proc/filesystems &>/dev/null; then - XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) - SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) - truncate -s 314572800 ${XFS_IMG} - mkfs.xfs -q ${XFS_IMG} - mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} - MOUNTED_XFS=1 - fi + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + if test_selected "thp"; then + if grep xfs /proc/filesystems &>/dev/null; then + XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) + SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) + truncate -s 314572800 ${XFS_IMG} + mkfs.xfs -q ${XFS_IMG} + mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} + MOUNTED_XFS=1 + fi + fi fi fi @@ -439,6 +515,12 @@ CATEGORY="mkdirty" run_test ./mkdirty CATEGORY="mdwe" run_test ./mdwe_test +CATEGORY="page_frag" run_test ./test_page_frag.sh smoke + +CATEGORY="page_frag" run_test ./test_page_frag.sh aligned + +CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 7dbfa53d93a0..8e1462ce0532 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -128,7 +128,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon) { const char *type[] = {"file", "anon"}; const char *fname = "./soft-dirty-test-file"; - int test_fd; + int test_fd = 0; char *map; if (anon) { @@ -209,5 +209,5 @@ int main(int argc, char **argv) close(pagemap_fd); - return ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 6c988bd2f335..aa7400ed0e99 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -5,6 +5,7 @@ */ #define _GNU_SOURCE +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -14,6 +15,7 @@ #include <fcntl.h> #include <sys/mman.h> #include <sys/mount.h> +#include <sys/param.h> #include <malloc.h> #include <stdbool.h> #include <time.h> @@ -84,7 +86,67 @@ static void write_debugfs(const char *fmt, ...) write_file(SPLIT_DEBUGFS, input, ret + 1); } -void split_pmd_thp(void) +static char *allocate_zero_filled_hugepage(size_t len) +{ + char *result; + size_t i; + + result = memalign(pmd_pagesize, len); + if (!result) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(result, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + result[i] = (char)0; + + return result; +} + +static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) +{ + unsigned long rss_anon_before, rss_anon_after; + size_t i; + + if (!check_huge_anon(one_page, 4, pmd_pagesize)) + ksft_exit_fail_msg("No THP is allocated\n"); + + rss_anon_before = rss_anon(); + if (!rss_anon_before) + ksft_exit_fail_msg("No RssAnon is allocated before split\n"); + + /* split all THPs */ + write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, + (uint64_t)one_page + len, 0); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)0) + ksft_exit_fail_msg("%ld byte corrupted\n", i); + + if (!check_huge_anon(one_page, 0, pmd_pagesize)) + ksft_exit_fail_msg("Still AnonHugePages not split\n"); + + rss_anon_after = rss_anon(); + if (rss_anon_after >= rss_anon_before) + ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", + rss_anon_before, rss_anon_after); +} + +void split_pmd_zero_pages(void) +{ + char *one_page; + int nr_hpages = 4; + size_t len = nr_hpages * pmd_pagesize; + + one_page = allocate_zero_filled_hugepage(len); + verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); + ksft_test_result_pass("Split zero filled huge pages successful\n"); + free(one_page); +} + +void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -104,7 +166,7 @@ void split_pmd_thp(void) /* split all THPs */ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, - (uint64_t)one_page + len, 0); + (uint64_t)one_page + len, order); for (i = 0; i < len; i++) if (one_page[i] != (char)i) @@ -114,7 +176,7 @@ void split_pmd_thp(void) if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); - ksft_test_result_pass("Split huge pages successful\n"); + ksft_test_result_pass("Split huge pages to order %d successful\n", order); free(one_page); } @@ -201,18 +263,32 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); } -void split_file_backed_thp(void) +void split_file_backed_thp(int order) { int status; int fd; - ssize_t num_written; char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; const char *tmpfs_loc = mkdtemp(tmpfs_template); char testfile[INPUT_MAX]; + ssize_t num_written, num_read; + char *file_buf1, *file_buf2; uint64_t pgoff_start = 0, pgoff_end = 1024; + int i; ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); + file_buf1 = (char *)malloc(pmd_pagesize); + file_buf2 = (char *)malloc(pmd_pagesize); + + if (!file_buf1 || !file_buf2) { + ksft_print_msg("cannot allocate file buffers\n"); + goto out; + } + + for (i = 0; i < pmd_pagesize; i++) + file_buf1[i] = (char)i; + memset(file_buf2, 0, pmd_pagesize); + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); if (status) @@ -221,26 +297,45 @@ void split_file_backed_thp(void) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + goto cleanup; } - fd = open(testfile, O_CREAT|O_WRONLY, 0664); + fd = open(testfile, O_CREAT|O_RDWR, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; } - /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); - close(fd); + /* write pmd size data to the file, so a file-backed THP can be allocated */ + num_written = write(fd, file_buf1, pmd_pagesize); - if (num_written < 1) { - ksft_perror("Fail to write data to testing file"); - goto cleanup; + if (num_written == -1 || num_written != pmd_pagesize) { + ksft_perror("Failed to write data to testing file"); + goto close_file; } /* split the file-backed THP */ - write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); + /* check file content after split */ + status = lseek(fd, 0, SEEK_SET); + if (status == -1) { + ksft_perror("Cannot lseek file"); + goto close_file; + } + + num_read = read(fd, file_buf2, num_written); + if (num_read == -1 || num_read != num_written) { + ksft_perror("Cannot read file content back"); + goto close_file; + } + + if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { + ksft_print_msg("File content changed\n"); + goto close_file; + } + + close(fd); status = unlink(testfile); if (status) { ksft_perror("Cannot remove testing file"); @@ -258,12 +353,15 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); ksft_print_msg("Please check dmesg for more information\n"); - ksft_test_result_pass("File-backed THP split test done\n"); + ksft_test_result_pass("File-backed THP split to order %d test done\n", order); return; +close_file: + close(fd); cleanup: umount(tmpfs_loc); rmdir(tmpfs_loc); +out: ksft_exit_fail_msg("Error occurred\n"); } @@ -300,7 +398,8 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int dummy; + int dummy = 0; + unsigned char buf[1024]; srand(time(NULL)); @@ -308,11 +407,12 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, if (*fd == -1) ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); - for (i = 0; i < fd_size; i++) { - unsigned char byte = (unsigned char)i; + assert(fd_size % sizeof(buf) == 0); + for (i = 0; i < sizeof(buf); i++) + buf[i] = (unsigned char)i; + for (i = 0; i < fd_size; i += sizeof(buf)) + write(*fd, buf, sizeof(buf)); - write(*fd, &byte, sizeof(byte)); - } close(*fd); sync(); *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); @@ -341,6 +441,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, for (size_t i = 0; i < fd_size; i++) dummy += *(*addr + i); + asm volatile("" : "+r" (dummy)); if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); @@ -359,7 +460,8 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) +void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, + int order, int offset) { int fd; char *addr; @@ -377,7 +479,12 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l return; err = 0; - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); + if (offset == -1) + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order); + else + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -396,9 +503,15 @@ out: munmap(addr, fd_size); close(fd); unlink(testfile); - if (err) - ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); - ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + if (offset == -1) { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + } else { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); + } } int main(int argc, char **argv) @@ -409,6 +522,7 @@ int main(int argc, char **argv) char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; const char *fs_loc; bool created_tmp; + int offset; ksft_print_header(); @@ -420,7 +534,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(3+9); + ksft_set_plan(1+8+1+9+9+8*4+2); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -430,14 +544,26 @@ int main(int argc, char **argv) fd_size = 2 * pmd_pagesize; - split_pmd_thp(); + split_pmd_zero_pages(); + + for (i = 0; i < 9; i++) + if (i != 1) + split_pmd_thp_to_order(i); + split_pte_mapped_thp(); - split_file_backed_thp(); + for (i = 0; i < 9; i++) + split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); for (i = 8; i >= 0; i--) - split_thp_in_pagecache_to_order(fd_size, i, fs_loc); + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); + + for (i = 0; i < 9; i++) + for (offset = 0; + offset < pmd_pagesize / pagesize; + offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); ksft_finished(); diff --git a/tools/testing/selftests/mm/test_page_frag.sh b/tools/testing/selftests/mm/test_page_frag.sh new file mode 100755 index 000000000000..f55b105084cf --- /dev/null +++ b/tools/testing/selftests/mm/test_page_frag.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> +# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> +# +# This is a test script for the kernel test driver to test the +# correctness and performance of page_frag's implementation. +# Therefore it is just a kernel module loader. You can specify +# and pass different parameters in order to: +# a) analyse performance of page fragment allocations; +# b) stressing and stability check of page_frag subsystem. + +DRIVER="./page_frag/page_frag_test.ko" +CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) +TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}') + +if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then + TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}') + NR_TEST=100000000 +else + TEST_CPU_1=$TEST_CPU_0 + NR_TEST=1000000 +fi + +# 1 if fails +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +check_test_failed_prefix() { + if dmesg | grep -q 'page_frag_test failed:';then + echo "page_frag_test failed, please check dmesg" + exit $exitcode + fi +} + +# +# Static templates for testing of page_frag APIs. +# Also it is possible to pass any supported parameters manually. +# +SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1" +NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST" +ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1" + +check_test_requirements() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit $ksft_skip + fi + + if ! which insmod > /dev/null 2>&1; then + echo "$0: You need insmod installed" + exit $ksft_skip + fi + + if [ ! -f $DRIVER ]; then + echo "$0: You need to compile page_frag_test module" + exit $ksft_skip + fi +} + +run_nonaligned_check() +{ + echo "Run performance tests to evaluate how fast nonaligned alloc API is." + + insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1 +} + +run_aligned_check() +{ + echo "Run performance tests to evaluate how fast aligned alloc API is." + + insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1 +} + +run_smoke_check() +{ + echo "Run smoke test." + + insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1 +} + +usage() +{ + echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | " + echo "manual parameters" + echo + echo "Valid tests and parameters:" + echo + modinfo $DRIVER + echo + echo "Example usage:" + echo + echo "# Shows help message" + echo "$0" + echo + echo "# Smoke testing" + echo "$0 smoke" + echo + echo "# Performance testing for nonaligned alloc API" + echo "$0 nonaligned" + echo + echo "# Performance testing for aligned alloc API" + echo "$0 aligned" + echo + exit 0 +} + +function validate_passed_args() +{ + VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'` + + # + # Something has been passed, check it. + # + for passed_arg in $@; do + key=${passed_arg//=*/} + valid=0 + + for valid_arg in $VALID_ARGS; do + if [[ $key = $valid_arg ]]; then + valid=1 + break + fi + done + + if [[ $valid -ne 1 ]]; then + echo "Error: key is not correct: ${key}" + exit $exitcode + fi + done +} + +function run_manual_check() +{ + # + # Validate passed parameters. If there is wrong one, + # the script exists and does not execute further. + # + validate_passed_args $@ + + echo "Run the test with following parameters: $@" + insmod $DRIVER $@ > /dev/null 2>&1 +} + +function run_test() +{ + if [ $# -eq 0 ]; then + usage + else + if [[ "$1" = "smoke" ]]; then + run_smoke_check + elif [[ "$1" = "nonaligned" ]]; then + run_nonaligned_check + elif [[ "$1" = "aligned" ]]; then + run_aligned_check + else + run_manual_check $@ + fi + fi + + check_test_failed_prefix + + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +check_test_requirements +run_test $@ + +exit 0 diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index a4163438108e..ad872af1c81a 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -33,10 +33,11 @@ static const char * const thp_defrag_strings[] = { }; static const char * const shmem_enabled_strings[] = { + "never", "always", "within_size", "advise", - "never", + "inherit", "deny", "force", NULL @@ -86,7 +87,7 @@ int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; } -const unsigned long read_num(const char *path) +unsigned long read_num(const char *path) { char buf[21]; @@ -171,7 +172,7 @@ void thp_write_string(const char *name, const char *val) } } -const unsigned long thp_read_num(const char *name) +unsigned long thp_read_num(const char *name) { char path[PATH_MAX]; int ret; @@ -200,6 +201,7 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int i; @@ -234,12 +236,24 @@ void thp_read_settings(struct thp_settings *settings) settings->hugepages[i].enabled = thp_read_string(path, thp_enabled_strings); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) { + settings->shmem_hugepages[i].enabled = SHMEM_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + settings->shmem_hugepages[i].enabled = + thp_read_string(path, shmem_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int enabled; int i; @@ -271,6 +285,15 @@ void thp_write_settings(struct thp_settings *settings) enabled = settings->hugepages[i].enabled; thp_write_string(path, thp_enabled_strings[enabled]); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + enabled = settings->shmem_hugepages[i].enabled; + thp_write_string(path, shmem_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -324,17 +347,18 @@ void thp_set_read_ahead_path(char *path) dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } -unsigned long thp_supported_orders(void) +static unsigned long __thp_supported_orders(bool is_shmem) { unsigned long orders = 0; char path[PATH_MAX]; char buf[256]; - int ret; - int i; + int ret, i; + char anon_dir[] = "enabled"; + char shmem_dir[] = "shmem_enabled"; for (i = 0; i < NR_ORDERS; i++) { - ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", - (getpagesize() >> 10) << i); + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s", + (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir); if (ret >= PATH_MAX) { printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); @@ -347,3 +371,13 @@ unsigned long thp_supported_orders(void) return orders; } + +unsigned long thp_supported_orders(void) +{ + return __thp_supported_orders(false); +} + +unsigned long thp_shmem_supported_orders(void) +{ + return __thp_supported_orders(true); +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 71cbff05f4c7..fc131d23d593 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -22,10 +22,11 @@ enum thp_defrag { }; enum shmem_enabled { + SHMEM_NEVER, SHMEM_ALWAYS, SHMEM_WITHIN_SIZE, SHMEM_ADVISE, - SHMEM_NEVER, + SHMEM_INHERIT, SHMEM_DENY, SHMEM_FORCE, }; @@ -46,6 +47,10 @@ struct khugepaged_settings { unsigned long pages_to_scan; }; +struct shmem_hugepages_settings { + enum shmem_enabled enabled; +}; + struct thp_settings { enum thp_enabled thp_enabled; enum thp_defrag thp_defrag; @@ -54,16 +59,17 @@ struct thp_settings { struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; struct hugepages_settings hugepages[NR_ORDERS]; + struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); int write_file(const char *path, const char *buf, size_t buflen); -const unsigned long read_num(const char *path); +unsigned long read_num(const char *path); void write_num(const char *path, unsigned long num); int thp_read_string(const char *name, const char * const strings[]); void thp_write_string(const char *name, const char *val); -const unsigned long thp_read_num(const char *name); +unsigned long thp_read_num(const char *name); void thp_write_num(const char *name, unsigned long num); void thp_write_settings(struct thp_settings *settings); @@ -76,5 +82,6 @@ void thp_save_settings(void); void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); +unsigned long thp_shmem_supported_orders(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index ea7fd8fe2876..95b6f043a3cb 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -13,8 +13,9 @@ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m (warning this will remove all if someone else uses them) */ -#define _GNU_SOURCE 1 +#define _GNU_SOURCE #include <sys/mman.h> +#include <linux/mman.h> #include <stdlib.h> #include <stdio.h> #include <sys/ipc.h> @@ -28,19 +29,23 @@ #include "vm_util.h" #include "../kselftest.h" -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f #if !defined(MAP_HUGETLB) #define MAP_HUGETLB 0x40000 #endif #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#ifndef SHM_HUGE_SHIFT #define SHM_HUGE_SHIFT 26 +#endif +#ifndef SHM_HUGE_MASK #define SHM_HUGE_MASK 0x3f +#endif +#ifndef SHM_HUGE_2MB #define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#endif +#ifndef SHM_HUGE_1GB #define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) +#endif #define NUM_PAGESIZES 5 #define NUM_PAGES 4 @@ -72,7 +77,7 @@ void show(unsigned long ps) system(buf); } -unsigned long read_sysfs(int warn, char *fmt, ...) +unsigned long thuge_read_sysfs(int warn, char *fmt, ...) { char *line = NULL; size_t linelen = 0; @@ -101,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...) unsigned long read_free(unsigned long ps) { - return read_sysfs(ps != getpagesize(), + return thuge_read_sysfs(ps != getpagesize(), "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", ps >> 10); } @@ -122,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap\n", __func__); + "%s mmap %lu %x\n", __func__, size, flags); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -160,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap\n", __func__); + "%s: mmap %lu %x\n", __func__, size, flags); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } @@ -190,7 +195,7 @@ void find_pagesizes(void) } globfree(&g); - if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..a37088a23ffe 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -10,7 +10,7 @@ #define BASE_PMD_ADDR ((void *)(1UL << 30)) volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; int uffd = -1, uffd_flags, finished, *pipefd, test_type; bool map_shared; @@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void) size_t i; if (pipefd) { - for (i = 0; i < nr_cpus * 2; ++i) { + for (i = 0; i < nr_parallel * 2; ++i) { if (close(pipefd[i])) err("close pipefd"); } @@ -323,7 +323,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) ret = userfaultfd_open(&features); if (ret) { if (errmsg) - *errmsg = "possible lack of priviledge"; + *errmsg = "possible lack of privilege"; return ret; } @@ -348,7 +348,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) /* * After initialization of area_src, we must explicitly release pages * for area_dst to make sure it's fully empty. Otherwise we could have - * some area_dst pages be errornously initialized with zero pages, + * some area_dst pages be erroneously initialized with zero pages, * hence we could hit memory corruption later in the test. * * One example is when THP is globally enabled, above allocate_area() @@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) */ uffd_test_ops->release_pages(area_dst); - pipefd = malloc(sizeof(int) * nr_cpus * 2); + pipefd = malloc(sizeof(int) * nr_parallel * 2); if (!pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index cc5629c3d2aa..7700cbfa3975 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -8,6 +8,7 @@ #define __UFFD_COMMON_H__ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ // Use ll64 #include <stdio.h> #include <errno.h> #include <unistd.h> @@ -97,7 +98,7 @@ struct uffd_test_case_ops { }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; extern bool map_shared; diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index f78bab0f3d45..40af7f67c407 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -36,6 +36,7 @@ #include "uffd-common.h" +uint64_t features; #ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) @@ -179,12 +180,12 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; + pthread_t locking_threads[nr_parallel]; + pthread_t uffd_threads[nr_parallel]; + pthread_t background_threads[nr_parallel]; finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, locking_thread, (void *)cpu)) return 1; @@ -202,7 +203,7 @@ static int stress(struct uffd_args *args) background_thread, (void *)cpu)) return 1; } - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -218,11 +219,11 @@ static int stress(struct uffd_args *args) uffd_test_ops->release_pages(area_src); finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { if (write(pipefd[cpu*2+1], &c, 1) != 1) @@ -245,12 +246,16 @@ static int userfaultfd_stress(void) { void *area; unsigned long nr; - struct uffd_args args[nr_cpus]; + struct uffd_args args[nr_parallel]; uint64_t mem_size = nr_pages * page_size; + int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + memset(args, 0, sizeof(struct uffd_args) * nr_parallel); - if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) + if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + flags = UFFD_FEATURE_WP_UNPOPULATED; + + if (uffd_test_ctx_init(flags, NULL)) err("context init failed"); if (posix_memalign(&area, page_size, page_size)) @@ -320,7 +325,7 @@ static int userfaultfd_stress(void) */ uffd_test_ops->release_pages(area_dst); - uffd_stats_reset(args, nr_cpus); + uffd_stats_reset(args, nr_parallel); /* bounce pass */ if (stress(args)) { @@ -354,7 +359,7 @@ static int userfaultfd_stress(void) swap(area_src_alias, area_dst_alias); - uffd_stats_report(args, nr_cpus); + uffd_stats_report(args, nr_parallel); } uffd_test_ctx_clear(); @@ -385,8 +390,6 @@ static void set_test_type(const char *type) static void parse_test_type_arg(const char *raw_type) { - uint64_t features = UFFD_API_FEATURES; - set_test_type(raw_type); if (!test_type) @@ -409,12 +412,15 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (userfaultfd_open(&features)) - err("Userfaultfd open failed"); + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + test_uffdio_wp = false; + close(uffd); uffd = -1; } @@ -429,6 +435,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + unsigned long nr_cpus; size_t bytes; if (argc < 4) @@ -448,10 +455,19 @@ int main(int argc, char **argv) } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + nr_parallel = 32; + } else { + nr_parallel = nr_cpus; + } - nr_pages_per_cpu = bytes / page_size / nr_cpus; + nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { - _err("invalid MiB"); + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, page_size, nr_parallel); usage(); } @@ -460,7 +476,7 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_cpus; + nr_pages = nr_pages_per_cpu * nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 21ec23206ab4..c73fd5d455c8 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -26,6 +26,8 @@ #define ALIGN_UP(x, align_to) \ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + struct mem_type { const char *name; unsigned int mem_flag; @@ -196,9 +198,10 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, else page_size = psize(); - nr_pages = UFFD_TEST_MEM_SIZE / page_size; + /* Ensure we have at least 2 pages */ + nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; /* TODO: remove this global var.. it's so ugly */ - nr_cpus = 1; + nr_parallel = 1; /* Initialize test arguments */ args->mem_type = mem_type; @@ -242,6 +245,8 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; + ready_for_fork = true; + /* Read until a full msg received */ while (uffd_read_msg(args->parent_uffd, &msg)); @@ -309,8 +314,11 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { + ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ } child = fork(); @@ -1118,7 +1126,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, char c; unsigned long long count; struct uffd_args args = { 0 }; - char *orig_area_src, *orig_area_dst; + char *orig_area_src = NULL, *orig_area_dst = NULL; unsigned long step_size, step_count; unsigned long src_offs = 0; unsigned long dst_offs = 0; @@ -1186,7 +1194,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, nr, count, count_verify[src_offs + nr + i]); } } - if (step_size > page_size) { + if (chunk_size > page_size) { area_src = orig_area_src; area_dst = orig_area_dst; } @@ -1223,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs) uffd_move_pmd_handle_fault); } +static bool +uffdio_verify_results(const char *name, int ret, int error, long result) +{ + /* + * Should always return -1 with errno=EAGAIN, with corresponding + * result field updated in ioctl() args to be -EAGAIN too + * (e.g. copy.copy field for UFFDIO_COPY). + */ + if (ret != -1) { + uffd_test_fail("%s should have returned -1", name); + return false; + } + + if (error != EAGAIN) { + uffd_test_fail("%s should have errno==EAGAIN", name); + return false; + } + + if (result != -EAGAIN) { + uffd_test_fail("%s should have been updated for -EAGAIN", + name); + return false; + } + + return true; +} + +/* + * This defines a function to test one ioctl. Note that here "field" can + * be 1 or anything not -EAGAIN. With that initial value set, we can + * verify later that it should be updated by kernel (when -EAGAIN + * returned), by checking whether it is also updated to -EAGAIN. + */ +#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \ + static bool uffdio_mmap_changing_test_##name(int fd) \ + { \ + int ret; \ + struct uffdio_##name args = { \ + .field = 1, \ + }; \ + ret = ioctl(fd, ioctl_name, &args); \ + return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ + } + +DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) +DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) +DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) +DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) +DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) + +typedef enum { + /* We actually do not care about any state except UNINTERRUPTIBLE.. */ + THR_STATE_UNKNOWN = 0, + THR_STATE_UNINTERRUPTIBLE, +} thread_state; + +static void sleep_short(void) +{ + usleep(1000); +} + +static thread_state thread_state_get(pid_t tid) +{ + const char *header = "State:\t"; + char tmp[256], *p, c; + FILE *fp; + + snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); + fp = fopen(tmp, "r"); + + if (!fp) + return THR_STATE_UNKNOWN; + + while (fgets(tmp, sizeof(tmp), fp)) { + p = strstr(tmp, header); + if (p) { + /* For example, "State:\tD (disk sleep)" */ + c = *(p + sizeof(header) - 1); + return c == 'D' ? + THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; + } + } + + return THR_STATE_UNKNOWN; +} + +static void thread_state_until(pid_t tid, thread_state state) +{ + thread_state s; + + do { + s = thread_state_get(tid); + sleep_short(); + } while (s != state); +} + +static void *uffd_mmap_changing_thread(void *opaque) +{ + volatile pid_t *pid = opaque; + int ret; + + /* Unfortunately, it's only fetch-able from the thread itself.. */ + assert(*pid == 0); + *pid = syscall(SYS_gettid); + + /* Inject an event, this will hang solid until the event read */ + ret = madvise(area_dst, page_size, MADV_REMOVE); + if (ret) + err("madvise(MADV_REMOVE) failed"); + + return NULL; +} + +static void uffd_consume_message(int fd) +{ + struct uffd_msg msg = { 0 }; + + while (uffd_read_msg(fd, &msg)); +} + +static void uffd_mmap_changing_test(uffd_test_args_t *targs) +{ + /* + * This stores the real PID (which can be different from how tid is + * defined..) for the child thread, 0 means not initialized. + */ + pid_t pid = 0; + pthread_t tid; + int ret; + + if (uffd_register(uffd, area_dst, nr_pages * page_size, + true, false, false)) + err("uffd_register() failed"); + + /* Create a thread to generate the racy event */ + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + if (ret) + err("pthread_create() failed"); + + /* + * Wait until the thread setup the pid. Use volatile to make sure + * it reads from RAM not regs. + */ + while (!(volatile pid_t)pid) + sleep_short(); + + /* Wait until the thread hangs at REMOVE event */ + thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); + + if (!uffdio_mmap_changing_test_copy(uffd)) + return; + + if (!uffdio_mmap_changing_test_zeropage(uffd)) + return; + + if (!uffdio_mmap_changing_test_move(uffd)) + return; + + if (!uffdio_mmap_changing_test_poison(uffd)) + return; + + if (!uffdio_mmap_changing_test_continue(uffd)) + return; + + /* + * All succeeded above! Recycle everything. Start by reading the + * event so as to kick the thread roll again.. + */ + uffd_consume_message(uffd); + + ret = pthread_join(tid, NULL); + assert(ret == 0); + + uffd_test_pass(); +} + static int prevent_hugepages(const char **errmsg) { /* This should be done before source area is populated */ @@ -1462,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = { .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_POISON, }, + { + .name = "mmap-changing", + .uffd_fn = uffd_mmap_changing_test, + /* + * There's no point running this test over all mem types as + * they share the same code paths. + * + * Choose shmem for simplicity, because (1) shmem supports + * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is + * almost always available (unlike hugetlb). Here we + * abused SHMEM for UFFDIO_MOVE, but the test we want to + * cover doesn't yet need the correct memory type.. + */ + .mem_targets = MEM_SHMEM, + /* + * Any UFFD_FEATURE_EVENT_* should work to trigger the + * race logically, but choose the simplest (REMOVE). + * + * Meanwhile, since we'll cover quite a few new ioctl()s + * (CONTINUE, POISON, MOVE), skip this test for old kernels + * by choosing all of them. + */ + .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | + UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | + UFFD_FEATURE_MINOR_SHMEM, + }, }; static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c new file mode 100644 index 000000000000..c2ba7d46c7b4 --- /dev/null +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <fcntl.h> +#include <assert.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include "../kselftest.h" +#include "thp_settings.h" +#include "uffd-common.h" + +static int pagemap_fd; +static size_t pagesize; +static int nr_pagesizes = 1; +static int nr_thpsizes; +static size_t thpsizes[20]; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; + +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!read_pmd_pagesize()) + return 0; + + orders = thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + +static void *mmap_aligned(size_t size, int prot, int flags) +{ + size_t mmap_size = size * 2; + char *mmap_mem, *mem; + + mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0); + if (mmap_mem == MAP_FAILED) + return mmap_mem; + + mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1)); + munmap(mmap_mem, mem - mmap_mem); + munmap(mem + size, mmap_mem + mmap_size - mem - size); + + return mem; +} + +static void *alloc_one_folio(size_t size, bool private, bool hugetlb) +{ + bool thp = !hugetlb && size > pagesize; + int flags = MAP_ANONYMOUS; + int prot = PROT_READ | PROT_WRITE; + char *mem, *addr; + + assert((size & (size - 1)) == 0); + + if (private) + flags |= MAP_PRIVATE; + else + flags |= MAP_SHARED; + + /* + * For THP, we must explicitly enable the THP size, allocate twice the + * required space then manually align. + */ + if (thp) { + struct thp_settings settings = *thp_current_settings(); + + if (private) + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + else + settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + + thp_push_settings(&settings); + + mem = mmap_aligned(size, prot, flags); + } else { + if (hugetlb) { + flags |= MAP_HUGETLB; + flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT; + } + + mem = mmap(NULL, size, prot, flags, -1, 0); + } + + if (mem == MAP_FAILED) { + mem = NULL; + goto out; + } + + assert(((uintptr_t)mem & (size - 1)) == 0); + + /* + * Populate the folio by writing the first byte and check that all pages + * are populated. Finally set the whole thing to non-zero data to avoid + * kernel from mapping it back to the zero page. + */ + mem[0] = 1; + for (addr = mem; addr < mem + size; addr += pagesize) { + if (!pagemap_is_populated(pagemap_fd, addr)) { + munmap(mem, size); + mem = NULL; + goto out; + } + } + memset(mem, 1, size); +out: + if (thp) + thp_pop_settings(); + + return mem; +} + +static bool check_uffd_wp_state(void *mem, size_t size, bool expect) +{ + uint64_t pte; + void *addr; + + for (addr = mem; addr < mem + size; addr += pagesize) { + pte = pagemap_get_entry(pagemap_fd, addr); + if (!!(pte & PM_UFFD_WP) != expect) { + ksft_test_result_fail("uffd-wp not %s for pte %lu!\n", + expect ? "set" : "clear", + (addr - mem) / pagesize); + return false; + } + } + + return true; +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} + +static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +{ + struct uffdio_writeprotect wp_prms; + uint64_t features = 0; + void *addr = NULL; + void *mem = NULL; + + assert(!(hugetlb && swapout)); + + ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); + + /* Allocate a folio of required size and type. */ + mem = alloc_one_folio(size, private, hugetlb); + if (!mem) { + ksft_test_result_fail("alloc_one_folio() failed\n"); + goto out; + } + + /* Register range for uffd-wp. */ + if (userfaultfd_open(&features)) { + if (errno == ENOENT) + ksft_test_result_skip("userfaultfd not available\n"); + else + ksft_test_result_fail("userfaultfd_open() failed\n"); + goto out; + } + if (uffd_register(uffd, mem, size, false, true, false)) { + ksft_test_result_fail("uffd_register() failed\n"); + goto out; + } + wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; + wp_prms.range.start = (uintptr_t)mem; + wp_prms.range.len = size; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); + goto out; + } + + if (swapout) { + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto out; + } + } + + /* Check that uffd-wp is set for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, true)) + goto out; + + /* + * Move the mapping to a new, aligned location. Since + * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for + * each PTE to be cleared in the new mapping. + */ + addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS); + if (addr == MAP_FAILED) { + ksft_test_result_fail("mmap_aligned() failed\n"); + goto out; + } + if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) { + ksft_test_result_fail("mremap() failed\n"); + munmap(addr, size); + goto out; + } + mem = addr; + + /* Check that uffd-wp is cleared for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, false)) + goto out; + + ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); +out: + if (mem) + munmap(mem, size); + if (uffd >= 0) { + close(uffd); + uffd = -1; + } +} + +struct testcase { + size_t *sizes; + int *nr_sizes; + bool private; + bool swapout; + bool hugetlb; +}; + +static const struct testcase testcases[] = { + /* base pages. */ + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* thp. */ + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* hugetlb. */ + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = false, + .swapout = false, + .hugetlb = true, + }, + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = true, + .swapout = false, + .hugetlb = true, + }, +}; + +int main(int argc, char **argv) +{ + struct thp_settings settings; + int i, j, plan = 0; + + pagesize = getpagesize(); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); + nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, + ARRAY_SIZE(hugetlbsizes)); + + /* If THP is supported, save THP settings and initially disable THP. */ + if (nr_thpsizes) { + thp_save_settings(); + thp_read_settings(&settings); + for (i = 0; i < NR_ORDERS; i++) { + settings.hugepages[i].enabled = THP_NEVER; + settings.shmem_hugepages[i].enabled = SHMEM_NEVER; + } + thp_push_settings(&settings); + } + + for (i = 0; i < ARRAY_SIZE(testcases); i++) + plan += *testcases[i].nr_sizes; + ksft_set_plan(plan); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + const struct testcase *tc = &testcases[i]; + + for (j = 0; j < *tc->nr_sizes; j++) + test_one_folio(tc->sizes[j], tc->private, tc->swapout, + tc->hugetlb); + } + + /* If THP is supported, restore original THP settings. */ + if (nr_thpsizes) + thp_restore_settings(); + + i = ksft_get_fail_cnt(); + if (i) + ksft_exit_fail_msg("%d out of %d tests failed\n", + i, ksft_test_num()); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index cfbc501290d3..896b3f73fc53 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -9,26 +9,9 @@ #include <sys/mman.h> #include <string.h> +#include "vm_util.h" #include "../kselftest.h" -#ifdef __powerpc64__ -#define PAGE_SIZE (64 << 10) -/* - * This will work with 16M and 2M hugepage size - */ -#define HUGETLB_SIZE (16 << 20) -#elif __aarch64__ -/* - * The default hugepage size for 64k base pagesize - * is 512MB. - */ -#define PAGE_SIZE (64 << 10) -#define HUGETLB_SIZE (512 << 20) -#else -#define PAGE_SIZE (4 << 10) -#define HUGETLB_SIZE (2 << 20) -#endif - /* * The hint addr value is used to allocate addresses * beyond the high address switch boundary. @@ -37,18 +20,8 @@ #define ADDR_MARK_128TB (1UL << 47) #define ADDR_MARK_256TB (1UL << 48) -#define HIGH_ADDR_128TB ((void *) (1UL << 48)) -#define HIGH_ADDR_256TB ((void *) (1UL << 49)) - -#define LOW_ADDR ((void *) (1UL << 30)) - -#ifdef __aarch64__ -#define ADDR_SWITCH_HINT ADDR_MARK_256TB -#define HIGH_ADDR HIGH_ADDR_256TB -#else -#define ADDR_SWITCH_HINT ADDR_MARK_128TB -#define HIGH_ADDR HIGH_ADDR_128TB -#endif +#define HIGH_ADDR_128TB (1UL << 48) +#define HIGH_ADDR_256TB (1UL << 49) struct testcase { void *addr; @@ -59,195 +32,230 @@ struct testcase { unsigned int keep_mapped:1; }; -static struct testcase testcases[] = { - { - /* - * If stack is moved, we could possibly allocate - * this at the requested address. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - /* - * Unless MAP_FIXED is specified, allocation based on hint - * addr is never at requested address or above it, which is - * beyond high address switch boundary in this case. Instead, - * a suitable allocation is found in lower address space. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", - .low_addr_required = 1, - }, - { - /* - * Exact mapping at high address switch boundary, should - * be obtained even without MAP_FIXED as area is free. - */ - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, -}; +static struct testcase *testcases; +static struct testcase *hugetlb_testcases; +static int sz_testcases, sz_hugetlb_testcases; +static unsigned long switch_hint; -static struct testcase hugetlb_testcases[] = { - { - .addr = NULL, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", - }, -}; +/* Initialize testcases inside a function to compute parameters at runtime */ +void testcases_init(void) +{ + unsigned long pagesize = getpagesize(); + unsigned long hugepagesize = default_huge_page_size(); + unsigned long low_addr = (1UL << 30); + unsigned long addr_switch_hint = ADDR_MARK_128TB; + unsigned long high_addr = HIGH_ADDR_128TB; + +#ifdef __aarch64__ + + /* Post LPA2, the lower userspace VA on a 16K pagesize is 47 bits. */ + if (pagesize != (16UL << 10)) { + addr_switch_hint = ADDR_MARK_256TB; + high_addr = HIGH_ADDR_256TB; + } +#endif + + struct testcase t[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + /* + * Unless MAP_FIXED is specified, allocation based on hint + * addr is never at requested address or above it, which is + * beyond high address switch boundary in this case. Instead, + * a suitable allocation is found in lower address space. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, (2 * pagesize))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at high address switch boundary, should + * be obtained even without MAP_FIXED as area is free. + */ + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize / 2), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize/2 , 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + }; + + struct testcase ht[] = { + { + .addr = NULL, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint , 2*hugepagesize, MAP_FIXED | MAP_HUGETLB)", + }, + }; + + testcases = malloc(sizeof(t)); + hugetlb_testcases = malloc(sizeof(ht)); + + /* Copy into global arrays */ + memcpy(testcases, t, sizeof(t)); + memcpy(hugetlb_testcases, ht, sizeof(ht)); + + sz_testcases = ARRAY_SIZE(t); + sz_hugetlb_testcases = ARRAY_SIZE(ht); + switch_hint = addr_switch_hint; +} static int run_test(struct testcase *test, int count) { @@ -267,7 +275,7 @@ static int run_test(struct testcase *test, int count) continue; } - if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + if (t->low_addr_required && p >= (void *)(switch_hint)) { printf("FAILED\n"); ret = KSFT_FAIL; } else { @@ -285,6 +293,20 @@ static int run_test(struct testcase *test, int count) return ret; } +#ifdef __aarch64__ +/* Check if userspace VA > 48 bits */ +static int high_address_present(void) +{ + void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr == MAP_FAILED) + return 0; + + munmap(ptr, 1); + return 1; +} +#endif + static int supported_arch(void) { #if defined(__powerpc64__) @@ -292,7 +314,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return getpagesize() == PAGE_SIZE; + return high_address_present(); #else return 0; #endif @@ -305,8 +327,10 @@ int main(int argc, char **argv) if (!supported_arch()) return KSFT_SKIP; - ret = run_test(testcases, ARRAY_SIZE(testcases)); + testcases_init(); + + ret = run_test(testcases, sz_testcases); if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) - ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + ret = run_test(hugetlb_testcases, sz_hugetlb_testcases); return ret; } diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index a0a75f302904..325de53966b6 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -7,23 +7,20 @@ # real test to check that the kernel is configured to support at least 5 # pagetable levels. -# 1 means the test failed -exitcode=1 - # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -fail() +skip() { echo "$1" - exit $exitcode + exit $ksft_skip } check_supported_x86_64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" # gzip -dcfq automatically handles both compressed and plaintext input. # See man 1 gzip under '-f'. @@ -33,11 +30,31 @@ check_supported_x86_64() else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then - echo "$0: CPU does not have the necessary la57 flag to support page table level 5" - exit $ksft_skip + skip "$0: CPU does not have the necessary la57 flag to support page table level 5" + fi +} + +check_supported_ppc64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" + + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + if [[ "${pg_table_levels}" -lt 5 ]]; then + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + fi + + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') + if [[ "$mmu_support" != "radix" ]]; then + skip "$0: System does not use Radix MMU, required for 5-level paging" + fi + + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) + if [[ "${hugepages_total}" -eq 0 ]]; then + skip "$0: HugePages are not enabled, required for some tests" fi } @@ -50,6 +67,9 @@ check_test_requirements() "x86_64") check_supported_x86_64 ;; + "ppc64le"|"ppc64") + check_supported_ppc64 + ;; *) return 0 ;; @@ -57,8 +77,4 @@ check_test_requirements() } check_test_requirements -./va_high_addr_switch - -# In order to run hugetlb testcases, "--run-hugetlb" must be appended -# to the binary. ./va_high_addr_switch --run-hugetlb diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 7bcf8d48256a..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -10,8 +10,12 @@ #include <string.h> #include <unistd.h> #include <errno.h> +#include <sys/prctl.h> #include <sys/mman.h> #include <sys/time.h> +#include <fcntl.h> + +#include "vm_util.h" #include "../kselftest.h" /* @@ -62,7 +66,7 @@ #define NR_CHUNKS_HIGH NR_CHUNKS_384TB #endif -static char *hind_addr(void) +static char *hint_addr(void) { int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); @@ -73,19 +77,40 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); } +static void mark_range(char *ptr, size_t size) +{ + if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) { + if (errno == EINVAL) { + /* Depends on CONFIG_ANON_VMA_NAME */ + ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n"); + ksft_finished(); + } else { + ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n"); + } + } +} + +static int is_marked_vma(const char *vma_name) +{ + return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n"); +} + static int validate_lower_address_hint(void) { char *ptr; ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | - PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) return 0; @@ -93,6 +118,77 @@ static int validate_lower_address_hint(void) return 1; } +static int validate_complete_va_space(void) +{ + unsigned long start_addr, end_addr, prev_end_addr; + char line[400]; + char prot[6]; + FILE *file; + int fd; + + fd = open("va_dump", O_CREAT | O_WRONLY, 0600); + unlink("va_dump"); + if (fd < 0) { + ksft_test_result_skip("cannot create or open dump file\n"); + ksft_finished(); + } + + file = fopen("/proc/self/maps", "r"); + if (file == NULL) + ksft_exit_fail_msg("cannot open /proc/self/maps\n"); + + prev_end_addr = 0; + while (fgets(line, sizeof(line), file)) { + const char *vma_name = NULL; + int vma_name_start = 0; + unsigned long hop; + + if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n", + &start_addr, &end_addr, prot, &vma_name_start) != 3) + ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + + if (vma_name_start) + vma_name = line + vma_name_start; + + /* end of userspace mappings; ignore vsyscall mapping */ + if (start_addr & (1UL << 63)) + return 0; + + /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */ + if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE) + return 1; + + prev_end_addr = end_addr; + + if (prot[0] != 'r') + continue; + + if (check_vmflag_io((void *)start_addr)) + continue; + + /* + * Confirm whether MAP_CHUNK_SIZE chunk can be found or not. + * If write succeeds, no need to check MAP_CHUNK_SIZE - 1 + * addresses after that. If the address was not held by this + * process, write would fail with errno set to EFAULT. + * Anyways, if write returns anything apart from 1, exit the + * program since that would mean a bug in /proc/self/maps. + */ + hop = 0; + while (start_addr + hop < end_addr) { + if (write(fd, (void *)(start_addr + hop), 1) != 1) + return 1; + lseek(fd, 0, SEEK_SET); + + if (is_marked_vma(vma_name)) + munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE); + + hop += MAP_CHUNK_SIZE; + } + } + return 0; +} + int main(int argc, char *argv[]) { char *ptr[NR_CHUNKS_LOW]; @@ -104,17 +200,16 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (i = 0; i < NR_CHUNKS_LOW; i++) { - ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr[i] == MAP_FAILED) { - if (validate_lower_address_hint()) { - ksft_test_result_skip("Memory constraint not fulfilled\n"); - ksft_finished(); - } + if (validate_lower_address_hint()) + ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n"); break; } + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(ptr[i], 0); } lchunks = i; @@ -125,16 +220,21 @@ int main(int argc, char *argv[]) } for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); - hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + hint = hint_addr(); + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (hptr[i] == MAP_FAILED) break; + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; + if (validate_complete_va_space()) { + ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n"); + ksft_finished(); + } for (i = 0; i < lchunks; i++) munmap(ptr[i], MAP_CHUNK_SIZE); diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5a62530da3b5..5492e3f784df 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> +#include <errno.h> #include <fcntl.h> #include <dirent.h> +#include <inttypes.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> #include <linux/fs.h> @@ -12,6 +14,7 @@ #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" #define SMAP_FILE_PATH "/proc/self/smaps" +#define STATUS_FILE_PATH "/proc/self/status" #define MAX_LINE_LENGTH 500 unsigned int __page_size; @@ -137,7 +140,7 @@ void clear_softdirty(void) ksft_exit_fail_msg("opening clear_refs failed\n"); ret = write(fd, ctrl, strlen(ctrl)); close(fd); - if (ret != strlen(ctrl)) + if (ret != (signed int)strlen(ctrl)) ksft_exit_fail_msg("writing clear_refs failed\n"); } @@ -171,13 +174,32 @@ uint64_t read_pmd_pagesize(void) return strtoul(buf, NULL, 10); } -bool __check_huge(void *addr, char *pattern, int nr_hpages, - uint64_t hpage_size) +unsigned long rss_anon(void) { - uint64_t thp = -1; - int ret; + unsigned long rss_anon = 0; FILE *fp; char buffer[MAX_LINE_LENGTH]; + + fp = fopen(STATUS_FILE_PATH, "r"); + if (!fp) + ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH); + + if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer))) + goto err_out; + + if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1) + ksft_exit_fail_msg("Reading status error\n"); + +err_out: + fclose(fp); + return rss_anon; +} + +char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len) +{ + int ret; + FILE *fp; + char *entry = NULL; char addr_pattern[MAX_LINE_LENGTH]; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", @@ -189,23 +211,40 @@ bool __check_huge(void *addr, char *pattern, int nr_hpages, if (!fp) ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); - if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) + if (!check_for_pattern(fp, addr_pattern, buf, len)) goto err_out; - /* - * Fetch the pattern in the same block and check the number of - * hugepages. - */ - if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) + /* Fetch the pattern in the same block */ + if (!check_for_pattern(fp, pattern, buf, len)) goto err_out; - snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); + /* Trim trailing newline */ + entry = strchr(buf, '\n'); + if (entry) + *entry = '\0'; - if (sscanf(buffer, addr_pattern, &thp) != 1) - ksft_exit_fail_msg("Reading smap error\n"); + entry = buf + strlen(pattern); err_out: fclose(fp); + return entry; +} + +bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) +{ + char buffer[MAX_LINE_LENGTH]; + uint64_t thp = -1; + char *entry; + + entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer)); + if (!entry) + goto err_out; + + if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1) + ksft_exit_fail_msg("Reading smap error\n"); + +err_out: return thp == (nr_hpages * (hpage_size >> 10)); } @@ -362,3 +401,126 @@ unsigned long get_free_hugepages(void) fclose(f); return fhp; } + +bool check_vmflag_io(void *addr) +{ + char buffer[MAX_LINE_LENGTH]; + const char *flags; + size_t flaglen; + + flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer)); + if (!flags) + ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr); + + while (true) { + flags += strspn(flags, " "); + + flaglen = strcspn(flags, " "); + if (!flaglen) + return false; + + if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + return true; + + flags += flaglen; + } +} + +/* + * Open an fd at /proc/$pid/maps and configure procmap_out ready for + * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. + */ +int open_procmap(pid_t pid, struct procmap_fd *procmap_out) +{ + char path[256]; + int ret = 0; + + memset(procmap_out, '\0', sizeof(*procmap_out)); + sprintf(path, "/proc/%d/maps", pid); + procmap_out->query.size = sizeof(procmap_out->query); + procmap_out->fd = open(path, O_RDONLY); + if (procmap_out->fd < 0) + ret = -errno; + + return ret; +} + +/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */ +int query_procmap(struct procmap_fd *procmap) +{ + int ret = 0; + + if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1) + ret = -errno; + + return ret; +} + +/* + * Try to find the VMA at specified address, returns true if found, false if not + * found, and the test is failed if any other error occurs. + * + * On success, procmap->query is populated with the results. + */ +bool find_vma_procmap(struct procmap_fd *procmap, void *address) +{ + int err; + + procmap->query.query_flags = 0; + procmap->query.query_addr = (unsigned long)address; + err = query_procmap(procmap); + if (!err) + return true; + + if (err != -ENOENT) + ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n", + __func__, err); + return false; +} + +/* + * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error + * code otherwise. + */ +int close_procmap(struct procmap_fd *procmap) +{ + return close(procmap->fd); +} + +int write_sysfs(const char *file_path, unsigned long val) +{ + FILE *f = fopen(file_path, "w"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fprintf(f, "%lu", val) < 0) { + perror("fprintf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} + +int read_sysfs(const char *file_path, unsigned long *val) +{ + FILE *f = fopen(file_path, "r"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fscanf(f, "%lu", val) != 1) { + perror("fscanf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9007c420d52c..b8136d12a0f8 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,13 +3,17 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> +#include <stdarg.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ +#include "../kselftest.h" +#include <linux/fs.h> #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -17,6 +21,15 @@ extern unsigned int __page_size; extern unsigned int __page_shift; +/* + * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl) + * /proc/$pid/[s]maps lookup. + */ +struct procmap_fd { + int fd; + struct procmap_query query; +}; + static inline unsigned int psize(void) { if (!__page_size) @@ -31,6 +44,23 @@ static inline unsigned int pshift(void) return __page_shift; } +/* + * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with + * ENOENT on unlinked files. See + * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such + * bugs. There are rumours of NFS implementations with similar bugs. + * + * Ideally, tests should just detect filesystems known to have such issues and + * bail early. But 9pfs has the additional "feature" that it causes fstatfs to + * pass through the f_type field from the host filesystem. To avoid having to + * scrape /proc/mounts or some other hackery, tests can call this function when + * it seems such a bug might have been encountered. + */ +static inline void skip_test_dodgy_fs(const char *op_name) +{ + ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name); +} + uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); @@ -39,6 +69,7 @@ unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); +unsigned long rss_anon(void); bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); @@ -52,6 +83,39 @@ int uffd_unregister(int uffd, void *addr, uint64_t len); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); +bool check_vmflag_io(void *addr); +int open_procmap(pid_t pid, struct procmap_fd *procmap_out); +int query_procmap(struct procmap_fd *procmap); +bool find_vma_procmap(struct procmap_fd *procmap, void *address); +int close_procmap(struct procmap_fd *procmap); +int write_sysfs(const char *file_path, unsigned long val); +int read_sysfs(const char *file_path, unsigned long *val); + +static inline int open_self_procmap(struct procmap_fd *procmap_out) +{ + pid_t pid = getpid(); + + return open_procmap(pid, procmap_out); +} + +/* These helpers need to be inline to match the kselftest.h idiom. */ +static char test_name[1024]; + +static inline void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + +static inline void log_test_result(int result) +{ + ksft_test_result_report(result, "%s\n", test_name); +} /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 6a2caba19ee1..34c91f7e6128 100644 --- a/tools/testing/selftests/mm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c @@ -28,7 +28,7 @@ enum method { /* Global variables. */ static const char *self; -static char *shmaddr; +static int *shmaddr; static int shmid; /* @@ -47,15 +47,17 @@ void sig_handler(int signo) { printf("Received %d.\n", signo); if (signo == SIGINT) { - printf("Deleting the memory\n"); - if (shmdt((const void *)shmaddr) != 0) { - perror("Detach failure"); + if (shmaddr) { + printf("Deleting the memory\n"); + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + shmctl(shmid, IPC_RMID, NULL); - exit(4); + printf("Done deleting the memory\n"); } - - shmctl(shmid, IPC_RMID, NULL); - printf("Done deleting the memory\n"); } exit(2); } @@ -87,7 +89,7 @@ int main(int argc, char **argv) size = atoi(optarg); break; case 'p': - strncpy(path, optarg, sizeof(path)); + strncpy(path, optarg, sizeof(path) - 1); break; case 'm': if (atoi(optarg) >= MAX_METHOD) { @@ -211,7 +213,8 @@ int main(int argc, char **argv) shmctl(shmid, IPC_RMID, NULL); exit(2); } - printf("shmaddr: %p\n", ptr); + shmaddr = ptr; + printf("shmaddr: %p\n", shmaddr); break; default: |