diff options
Diffstat (limited to 'tools/testing/selftests/mm/uffd-stress.c')
| -rw-r--r-- | tools/testing/selftests/mm/uffd-stress.c | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c new file mode 100644 index 000000000000..700fbaa18d44 --- /dev/null +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Stress userfaultfd syscall. + * + * Copyright (C) 2015 Red Hat, Inc. + * + * This test allocates two virtual areas and bounces the physical + * memory across the two virtual areas (from area_src to area_dst) + * using userfaultfd. + * + * There are three threads running per CPU: + * + * 1) one per-CPU thread takes a per-page pthread_mutex in a random + * page of the area_dst (while the physical page may still be in + * area_src), and increments a per-page counter in the same page, + * and checks its value against a verification region. + * + * 2) another per-CPU thread handles the userfaults generated by + * thread 1 above. userfaultfd blocking reads or poll() modes are + * exercised interleaved. + * + * 3) one last per-CPU thread transfers the memory in the background + * at maximum bandwidth (if not already transferred by thread + * 2). Each cpu thread takes cares of transferring a portion of the + * area. + * + * When all threads of type 3 completed the transfer, one bounce is + * complete. area_src and area_dst are then swapped. All threads are + * respawned and so the bounce is immediately restarted in the + * opposite direction. + * + * per-CPU threads 1 by triggering userfaults inside + * pthread_mutex_lock will also verify the atomicity of the memory + * transfer (UFFDIO_COPY). + */ + +#include "uffd-common.h" + +uint64_t features; +#ifdef __NR_userfaultfd + +#define BOUNCE_RANDOM (1<<0) +#define BOUNCE_RACINGFAULTS (1<<1) +#define BOUNCE_VERIFY (1<<2) +#define BOUNCE_POLL (1<<3) +static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; + +/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ +#define ALARM_INTERVAL_SECS 10 +static char *zeropage; +pthread_attr_t attr; + +#define swap(a, b) \ + do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +const char *examples = + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./uffd-stress anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./uffd-stress shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" + "./uffd-stress hugetlb 256 50\n\n" + "# Run the same hugetlb test but using private file:\n" + "./uffd-stress hugetlb-private 256 50\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + +static void usage(void) +{ + fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n"); + fprintf(stderr, "Supported <test type>: anon, hugetlb, " + "hugetlb-private, shmem, shmem-private\n\n"); + fprintf(stderr, "Examples:\n\n"); + fprintf(stderr, "%s", examples); + exit(1); +} + +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) +{ + int i; + + for (i = 0; i < n_cpus; i++) { + args[i].cpu = i; + args[i].apply_wp = gopts->test_uffdio_wp; + args[i].missing_faults = 0; + args[i].wp_faults = 0; + args[i].minor_faults = 0; + args[i].gopts = gopts; + } +} + +static void *locking_thread(void *arg) +{ + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; + unsigned long page_nr; + unsigned long long count; + + if (!(bounces & BOUNCE_RANDOM)) { + page_nr = -bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + page_nr += cpu * gopts->nr_pages_per_cpu; + } + + while (!gopts->finished) { + if (bounces & BOUNCE_RANDOM) { + if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) + err("getrandom failed"); + } else + page_nr += 1; + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) + err("page_nr %lu memory corruption %llu %llu", + page_nr, count, gopts->count_verify[page_nr]); + count++; + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); + } + + return NULL; +} + +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) +{ + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); +} + +pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *uffd_read_thread(void *arg) +{ + struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; + struct uffd_msg msg; + + pthread_mutex_unlock(&uffd_read_mutex); + /* from here cancellation is ok */ + + for (;;) { + if (uffd_read_msg(gopts, &msg)) + continue; + uffd_handle_page_fault(gopts, &msg, args); + } + + return NULL; +} + +static void *background_thread(void *arg) +{ + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; + unsigned long page_nr, start_nr, mid_nr, end_nr; + + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; + mid_nr = (start_nr + end_nr) / 2; + + /* Copy the first half of the pages */ + for (page_nr = start_nr; page_nr < mid_nr; page_nr++) + copy_page_retry(gopts, page_nr * gopts->page_size); + + /* + * If we need to test uffd-wp, set it up now. Then we'll have + * at least the first half of the pages mapped already which + * can be write-protected for testing + */ + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); + + /* + * Continue the 2nd half of the page copying, handling write + * protection faults if any + */ + for (page_nr = mid_nr; page_nr < end_nr; page_nr++) + copy_page_retry(gopts, page_nr * gopts->page_size); + + return NULL; +} + +static int stress(struct uffd_args *args) +{ + unsigned long cpu; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; + + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { + if (pthread_create(&locking_threads[cpu], &attr, + locking_thread, (void *)&args[cpu])) + return 1; + if (bounces & BOUNCE_POLL) { + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) + err("uffd_poll_thread create"); + } else { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_read_thread, + (void *)&args[cpu])) + return 1; + pthread_mutex_lock(&uffd_read_mutex); + } + if (pthread_create(&background_threads[cpu], &attr, + background_thread, (void *)&args[cpu])) + return 1; + } + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pthread_join(background_threads[cpu], NULL)) + return 1; + + /* + * Be strict and immediately zap area_src, the whole area has + * been transferred already by the background treads. The + * area_src could then be faulted in a racy way by still + * running uffdio_threads reading zeropages after we zapped + * area_src (but they're guaranteed to get -EEXIST from + * UFFDIO_COPY without writing zero pages into area_dst + * because the background threads already completed). + */ + uffd_test_ops->release_pages(gopts, gopts->area_src); + + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { + char c = '\0'; + if (bounces & BOUNCE_POLL) { + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) + err("pipefd write error"); + if (pthread_join(uffd_threads[cpu], + (void *)&args[cpu])) + return 1; + } else { + if (pthread_cancel(uffd_threads[cpu])) + return 1; + if (pthread_join(uffd_threads[cpu], NULL)) + return 1; + } + } + + return 0; +} + +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) +{ + void *area; + unsigned long nr; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; + int flags = 0; + + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); + + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) + flags = UFFD_FEATURE_WP_UNPOPULATED; + + if (uffd_test_ctx_init(gopts, flags, NULL)) + err("context init failed"); + + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) + err("out of memory"); + zeropage = area; + bzero(zeropage, gopts->page_size); + + pthread_mutex_lock(&uffd_read_mutex); + + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16*1024*1024); + + while (bounces--) { + printf("bounces: %d, mode:", bounces); + if (bounces & BOUNCE_RANDOM) + printf(" rnd"); + if (bounces & BOUNCE_RACINGFAULTS) + printf(" racing"); + if (bounces & BOUNCE_VERIFY) + printf(" ver"); + if (bounces & BOUNCE_POLL) + printf(" poll"); + else + printf(" read"); + printf(", "); + fflush(stdout); + + if (bounces & BOUNCE_POLL) + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + else + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); + + /* register */ + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) + err("register failure"); + + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) + err("register failure alias"); + } + + /* + * The madvise done previously isn't enough: some + * uffd_thread could have read userfaults (one of + * those already resolved by the background thread) + * and it may be in the process of calling + * UFFDIO_COPY. UFFDIO_COPY will read the zapped + * area_src and it would map a zero page in it (of + * course such a UFFDIO_COPY is perfectly safe as it'd + * return -EEXIST). The problem comes at the next + * bounce though: that racing UFFDIO_COPY would + * generate zeropages in the area_src, so invalidating + * the previous MADV_DONTNEED. Without this additional + * MADV_DONTNEED those zeropages leftovers in the + * area_src would lead to -EEXIST failure during the + * next bounce, effectively leaving a zeropage in the + * area_dst. + * + * Try to comment this out madvise to see the memory + * corruption being caught pretty quick. + * + * khugepaged is also inhibited to collapse THP after + * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's + * required to MADV_DONTNEED here. + */ + uffd_test_ops->release_pages(gopts, gopts->area_dst); + + uffd_stats_reset(gopts, args, gopts->nr_parallel); + + /* bounce pass */ + if (stress(args)) { + uffd_test_ctx_clear(gopts); + return 1; + } + + /* Clear all the write protections if there is any */ + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); + + /* unregister */ + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) + err("unregister failure"); + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) + err("unregister failure alias"); + } + + /* verification */ + if (bounces & BOUNCE_VERIFY) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) + err("error area_count %llu %llu %lu\n", + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); + + /* prepare next bounce */ + swap(gopts->area_src, gopts->area_dst); + + swap(gopts->area_src_alias, gopts->area_dst_alias); + + uffd_stats_report(args, gopts->nr_parallel); + } + uffd_test_ctx_clear(gopts); + + return 0; +} + +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) +{ + if (!strcmp(type, "anon")) { + gopts->test_type = TEST_ANON; + uffd_test_ops = &anon_uffd_test_ops; + } else if (!strcmp(type, "hugetlb")) { + gopts->test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + gopts->map_shared = true; + } else if (!strcmp(type, "hugetlb-private")) { + gopts->test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "shmem")) { + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; + uffd_test_ops = &shmem_uffd_test_ops; + } else if (!strcmp(type, "shmem-private")) { + gopts->test_type = TEST_SHMEM; + uffd_test_ops = &shmem_uffd_test_ops; + } +} + +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) +{ + set_test_type(gopts, raw_type); + + if (!gopts->test_type) + err("failed to parse test type argument: '%s'", raw_type); + + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); + else + gopts->page_size = sysconf(_SC_PAGE_SIZE); + + if (!gopts->page_size) + err("Unable to determine page size"); + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) + err("Impossible to run this test"); + + /* + * Whether we can test certain features depends not just on test type, + * but also on whether or not this particular kernel supports the + * feature. + */ + + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); + + gopts->test_uffdio_wp = gopts->test_uffdio_wp && + (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; + + close(gopts->uffd); + gopts->uffd = -1; +} + +static void sigalrm(int sig) +{ + if (sig != SIGALRM) + abort(); + gopts->test_uffdio_copy_eexist = true; + alarm(ALARM_INTERVAL_SECS); +} + +int main(int argc, char **argv) +{ + unsigned long nr_cpus; + size_t bytes; + + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + + if (argc < 4) + usage(); + + if (signal(SIGALRM, sigalrm) == SIG_ERR) + err("failed to arm SIGALRM"); + alarm(ALARM_INTERVAL_SECS); + + parse_test_type_arg(gopts, argv[1]); + bytes = atol(argv[2]) * 1024 * 1024; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + gopts->nr_parallel = 32; + } else { + gopts->nr_parallel = nr_cpus; + } + + /* + * src and dst each require bytes / page_size number of hugepages. + * Ensure nr_parallel - 1 hugepages on top of that to account + * for racy extra reservation of hugepages. + */ + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) { + printf("skip: Skipping userfaultfd... not enough hugepages\n"); + return KSFT_SKIP; + } + + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, gopts->page_size, gopts->nr_parallel); + usage(); + } + + bounces = atoi(argv[3]); + if (bounces <= 0) { + _err("invalid bounces"); + usage(); + } + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; + + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); +} + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ |
