diff options
Diffstat (limited to 'tools/testing/selftests/mm/hmm-tests.c')
| -rw-r--r-- | tools/testing/selftests/mm/hmm-tests.c | 2855 |
1 files changed, 2855 insertions, 0 deletions
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c new file mode 100644 index 000000000000..e8328c89d855 --- /dev/null +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -0,0 +1,2855 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HMM stands for Heterogeneous Memory Management, it is a helper layer inside + * the linux kernel to help device drivers mirror a process address space in + * the device. This allows the device to use the same address space which + * makes communication and data exchange a lot easier. + * + * This framework's sole purpose is to exercise various code paths inside + * the kernel to make sure that HMM performs as expected and to flush out any + * bugs. + */ + +#include "kselftest_harness.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <strings.h> +#include <time.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/time.h> + + +/* + * This is a private UAPI to the kernel test module so it isn't exported + * in the usual include/uapi/... directory. + */ +#include <lib/test_hmm_uapi.h> +#include <mm/gup_test.h> + +struct hmm_buffer { + void *ptr; + void *mirror; + unsigned long size; + int fd; + uint64_t cpages; + uint64_t faults; +}; + +enum { + HMM_PRIVATE_DEVICE_ONE, + HMM_PRIVATE_DEVICE_TWO, + HMM_COHERENCE_DEVICE_ONE, + HMM_COHERENCE_DEVICE_TWO, +}; + +#define ONEKB (1 << 10) +#define ONEMEG (1 << 20) +#define TWOMEG (1 << 21) +#define HMM_BUFFER_SIZE (1024 << 12) +#define HMM_PATH_MAX 64 +#define NTIMES 10 + +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +/* Just the flags we need, copied from mm.h: */ + +#ifndef FOLL_WRITE +#define FOLL_WRITE 0x01 /* check pte is writable */ +#endif + +#ifndef FOLL_LONGTERM +#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */ +#endif +FIXTURE(hmm) +{ + int fd; + unsigned int page_size; + unsigned int page_shift; +}; + +FIXTURE_VARIANT(hmm) +{ + int device_number; +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_private) +{ + .device_number = HMM_PRIVATE_DEVICE_ONE, +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent) +{ + .device_number = HMM_COHERENCE_DEVICE_ONE, +}; + +FIXTURE(hmm2) +{ + int fd0; + int fd1; + unsigned int page_size; + unsigned int page_shift; +}; + +FIXTURE_VARIANT(hmm2) +{ + int device_number0; + int device_number1; +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private) +{ + .device_number0 = HMM_PRIVATE_DEVICE_ONE, + .device_number1 = HMM_PRIVATE_DEVICE_TWO, +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent) +{ + .device_number0 = HMM_COHERENCE_DEVICE_ONE, + .device_number1 = HMM_COHERENCE_DEVICE_TWO, +}; + +static int hmm_open(int unit) +{ + char pathname[HMM_PATH_MAX]; + int fd; + + snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); + fd = open(pathname, O_RDWR, 0); + if (fd < 0) + fprintf(stderr, "could not open hmm dmirror driver (%s)\n", + pathname); + return fd; +} + +static bool hmm_is_coherent_type(int dev_num) +{ + return (dev_num >= HMM_COHERENCE_DEVICE_ONE); +} + +FIXTURE_SETUP(hmm) +{ + self->page_size = sysconf(_SC_PAGE_SIZE); + self->page_shift = ffs(self->page_size) - 1; + + self->fd = hmm_open(variant->device_number); + if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) + SKIP(return, "DEVICE_COHERENT not available"); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_SETUP(hmm2) +{ + self->page_size = sysconf(_SC_PAGE_SIZE); + self->page_shift = ffs(self->page_size) - 1; + + self->fd0 = hmm_open(variant->device_number0); + if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) + SKIP(return, "DEVICE_COHERENT not available"); + ASSERT_GE(self->fd0, 0); + self->fd1 = hmm_open(variant->device_number1); + ASSERT_GE(self->fd1, 0); +} + +FIXTURE_TEARDOWN(hmm) +{ + int ret = close(self->fd); + + ASSERT_EQ(ret, 0); + self->fd = -1; +} + +FIXTURE_TEARDOWN(hmm2) +{ + int ret = close(self->fd0); + + ASSERT_EQ(ret, 0); + self->fd0 = -1; + + ret = close(self->fd1); + ASSERT_EQ(ret, 0); + self->fd1 = -1; +} + +static int hmm_dmirror_cmd(int fd, + unsigned long request, + struct hmm_buffer *buffer, + unsigned long npages) +{ + struct hmm_dmirror_cmd cmd; + int ret; + + /* Simulate a device reading system memory. */ + cmd.addr = (__u64)buffer->ptr; + cmd.ptr = (__u64)buffer->mirror; + cmd.npages = npages; + + for (;;) { + ret = ioctl(fd, request, &cmd); + if (ret == 0) + break; + if (errno == EINTR) + continue; + return -errno; + } + buffer->cpages = cmd.cpages; + buffer->faults = cmd.faults; + + return 0; +} + +static void hmm_buffer_free(struct hmm_buffer *buffer) +{ + if (buffer == NULL) + return; + + if (buffer->ptr) { + munmap(buffer->ptr, buffer->size); + buffer->ptr = NULL; + } + free(buffer->mirror); + free(buffer); +} + +/* + * Create a temporary file that will be deleted on close. + */ +static int hmm_create_file(unsigned long size) +{ + char path[HMM_PATH_MAX]; + int fd; + + strcpy(path, "/tmp"); + fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); + if (fd >= 0) { + int r; + + do { + r = ftruncate(fd, size); + } while (r == -1 && errno == EINTR); + if (!r) + return fd; + close(fd); + } + return -1; +} + +/* + * Return a random unsigned number. + */ +static unsigned int hmm_random(void) +{ + static int fd = -1; + unsigned int r; + + if (fd < 0) { + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s:%d failed to open /dev/urandom\n", + __FILE__, __LINE__); + return ~0U; + } + } + read(fd, &r, sizeof(r)); + return r; +} + +static void hmm_nanosleep(unsigned int n) +{ + struct timespec t; + + t.tv_sec = 0; + t.tv_nsec = n; + nanosleep(&t, NULL); +} + +static int hmm_migrate_sys_to_dev(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages); +} + +static int hmm_migrate_dev_to_sys(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages); +} + +/* + * Simple NULL test of device open/close. + */ +TEST_F(hmm, open_close) +{ +} + +/* + * Read private anonymous memory. + */ +TEST_F(hmm, anon_read) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int val; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* + * Initialize buffer in system memory but leave the first two pages + * zero (pte_none and pfn_zero). + */ + i = 2 * self->page_size / sizeof(*ptr); + for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Set buffer permission to read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Populate the CPU page table with a special zero page. */ + val = *(int *)(buffer->ptr + self->page_size); + ASSERT_EQ(val, 0); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + ptr = buffer->mirror; + for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + for (; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Read private anonymous memory which has been protected with + * mprotect() PROT_NONE. + */ +TEST_F(hmm, anon_read_prot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize mirror buffer so we can verify it isn't written. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + /* Protect buffer from reading. */ + ret = mprotect(buffer->ptr, size, PROT_NONE); + ASSERT_EQ(ret, 0); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, -EFAULT); + + /* Allow CPU to read the buffer so we can check it. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + hmm_buffer_free(buffer); +} + +/* + * Write private anonymous memory. + */ +TEST_F(hmm, anon_write) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Write private anonymous memory which has been protected with + * mprotect() PROT_READ. + */ +TEST_F(hmm, anon_write_prot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device reading a zero page of memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + ASSERT_EQ(buffer->faults, 1); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, -EPERM); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + /* Now allow writing and see that the zero page is replaced. */ + ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Check that a device writing an anonymous private mapping + * will copy-on-write if a child process inherits the mapping. + * + * Also verifies after fork() memory the device can be read by child. + */ +TEST_F(hmm, anon_write_child) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + pid_t pid; + int child_fd; + int ret, use_thp, migrate; + + for (migrate = 0; migrate < 2; ++migrate) { + for (use_thp = 0; use_thp < 2; ++use_thp) { + npages = ALIGN(use_thp ? TWOMEG : HMM_BUFFER_SIZE, + self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size * 2; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size * 2, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + old_ptr = buffer->ptr; + if (use_thp) { + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + buffer->ptr = map; + } + + /* Initialize buffer->ptr so we can tell if it is written. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + if (migrate) { + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + } + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (pid != 0) { + waitpid(pid, &ret, 0); + ASSERT_EQ(WIFEXITED(ret), 1); + + /* Check that the parent's buffer did not change. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); + continue; + } + + /* Check that we see the parent's values. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + if (!migrate) { + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + } + + /* The child process needs its own mirror to its own mm. */ + child_fd = hmm_open(0); + ASSERT_GE(child_fd, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + if (!migrate) { + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + } + + close(child_fd); + exit(0); + } + } +} + +/* + * Check that a device writing an anonymous shared mapping + * will not copy-on-write if a child process inherits the mapping. + */ +TEST_F(hmm, anon_write_child_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + pid_t pid; + int child_fd; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer->ptr so we can tell if it is written. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (pid != 0) { + waitpid(pid, &ret, 0); + ASSERT_EQ(WIFEXITED(ret), 1); + + /* Check that the parent's buffer did change. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + return; + } + + /* Check that we see the parent's values. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + /* The child process needs its own mirror to its own mm. */ + child_fd = hmm_open(0); + ASSERT_GE(child_fd, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + close(child_fd); + exit(0); +} + +/* + * Write private anonymous huge page. + */ +TEST_F(hmm, anon_write_huge) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = 2 * TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + size = TWOMEG; + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Read numeric data from raw and tagged kernel status files. Used to read + * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag). + */ +static long file_read_ulong(char *file, const char *tag) +{ + int fd; + char buf[2048]; + int len; + char *p, *q; + long val; + + fd = open(file, O_RDONLY); + if (fd < 0) { + /* Error opening the file */ + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + /* Error in reading the file */ + return -1; + } + if (len == sizeof(buf)) { + /* Error file is too large */ + return -1; + } + buf[len] = '\0'; + + /* Search for a tag if provided */ + if (tag) { + p = strstr(buf, tag); + if (!p) + return -1; /* looks like the line we want isn't there */ + p += strlen(tag); + } else + p = buf; + + val = strtol(p, &q, 0); + if (*q != ' ') { + /* Error parsing the file */ + return -1; + } + + return val; +} + +/* + * Write huge TLBFS page. + */ +TEST_F(hmm, anon_write_hugetlbfs) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long default_hsize; + unsigned long i; + int *ptr; + int ret; + + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) + SKIP(return, "Huge page size could not be determined"); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); + npages = size >> self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { + free(buffer); + SKIP(return, "Huge page could not be allocated"); + } + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + munmap(buffer->ptr, buffer->size); + buffer->ptr = NULL; + hmm_buffer_free(buffer); +} + +/* + * Read mmap'ed file memory. + */ +TEST_F(hmm, file_read) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + ssize_t len; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Write initial contents of the file. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + len = pwrite(fd, buffer->mirror, size, 0); + ASSERT_EQ(len, size); + memset(buffer->mirror, 0, size); + + buffer->ptr = mmap(NULL, size, + PROT_READ, + MAP_SHARED, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Write mmap'ed file memory. + */ +TEST_F(hmm, file_write) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + ssize_t len; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Check that the device also wrote the file. */ + len = pread(fd, buffer->mirror, size, 0); + ASSERT_EQ(len, size); + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device private memory. + */ +TEST_F(hmm, migrate) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device private memory and fault some of it back + * to system memory, then try migrating the resulting mix of system and device + * private memory to the device. + */ +TEST_F(hmm, migrate_fault) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Fault half the pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) + ASSERT_EQ(ptr[i], i); + + /* Migrate memory to the device again. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +TEST_F(hmm, migrate_release) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Release device memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages); + ASSERT_EQ(ret, 0); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous shared memory to device private memory. + */ +TEST_F(hmm, migrate_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, -ENOENT); + + hmm_buffer_free(buffer); +} + +/* + * Try to migrate various memory types to device private memory. + */ +TEST_F(hmm2, migrate_mixed) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int *ptr; + unsigned char *p; + int ret; + int val; + + npages = 6; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + p = buffer->ptr; + + /* Migrating a protected area should be an error. */ + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); + ASSERT_EQ(ret, -EINVAL); + + /* Punch a hole after the first page address. */ + ret = munmap(buffer->ptr + self->page_size, self->page_size); + ASSERT_EQ(ret, 0); + + /* We expect an error if the vma doesn't cover the range. */ + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3); + ASSERT_EQ(ret, -EINVAL); + + /* Page 2 will be a read-only zero page. */ + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 2 * self->page_size); + val = *ptr + 3; + ASSERT_EQ(val, 3); + + /* Page 3 will be read-only. */ + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 3 * self->page_size); + *ptr = val; + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + + /* Page 4-5 will be read-write. */ + ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 4 * self->page_size); + *ptr = val; + ptr = (int *)(buffer->ptr + 5 * self->page_size); + *ptr = val; + + /* Now try to migrate pages 2-5 to device 1. */ + buffer->ptr = p + 2 * self->page_size; + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 4); + + /* Page 5 won't be migrated to device 0 because it's on device 1. */ + buffer->ptr = p + 5 * self->page_size; + ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); + ASSERT_EQ(ret, -ENOENT); + buffer->ptr = p; + + buffer->ptr = p; + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device memory and back to system memory + * multiple times. In case of private zone configuration, this is done + * through fault pages accessed by CPU. In case of coherent zone configuration, + * the pages from the device should be explicitly migrated back to system memory. + * The reason is Coherent device zone has coherent access by CPU, therefore + * it will not generate any page fault. + */ +TEST_F(hmm, migrate_multiple) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + unsigned long c; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; c++) { + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Migrate back to system memory and check them. */ + if (hmm_is_coherent_type(variant->device_number)) { + ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + } + + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); + } +} + +/* + * Read anonymous memory multiple times. + */ +TEST_F(hmm, anon_read_multiple) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + unsigned long c; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; c++) { + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i + c; + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, + npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i + c); + + hmm_buffer_free(buffer); + } +} + +void *unmap_buffer(void *p) +{ + struct hmm_buffer *buffer = p; + + /* Delay for a bit and then unmap buffer while it is being read. */ + hmm_nanosleep(hmm_random() % 32000); + munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); + buffer->ptr = NULL; + + return NULL; +} + +/* + * Try reading anonymous memory while it is being unmapped. + */ +TEST_F(hmm, anon_teardown) +{ + unsigned long npages; + unsigned long size; + unsigned long c; + void *ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; ++c) { + pthread_t thread; + struct hmm_buffer *buffer; + unsigned long i; + int *ptr; + int rc; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i + c; + + rc = pthread_create(&thread, NULL, unmap_buffer, buffer); + ASSERT_EQ(rc, 0); + + /* Simulate a device reading system memory. */ + rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, + npages); + if (rc == 0) { + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; + i < size / sizeof(*ptr); + ++i) + ASSERT_EQ(ptr[i], i + c); + } + + pthread_join(thread, &ret); + hmm_buffer_free(buffer); + } +} + +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ +TEST_F(hmm, mixedmap) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned char *m; + int ret; + + npages = 1; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + self->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ); + + hmm_buffer_free(buffer); +} + +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ +TEST_F(hmm2, snapshot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int *ptr; + unsigned char *p; + unsigned char *m; + int ret; + int val; + + npages = 7; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + p = buffer->ptr; + + /* Punch a hole after the first page address. */ + ret = munmap(buffer->ptr + self->page_size, self->page_size); + ASSERT_EQ(ret, 0); + + /* Page 2 will be read-only zero page. */ + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 2 * self->page_size); + val = *ptr + 3; + ASSERT_EQ(val, 3); + + /* Page 3 will be read-only. */ + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 3 * self->page_size); + *ptr = val; + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + + /* Page 4-6 will be read-write. */ + ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 4 * self->page_size); + *ptr = val; + + /* Page 5 will be migrated to device 0. */ + buffer->ptr = p + 5 * self->page_size; + ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + + /* Page 6 will be migrated to device 1. */ + buffer->ptr = p + 6 * self->page_size; + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + + /* Simulate a device snapshotting CPU pagetables. */ + buffer->ptr = p; + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); + ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); + ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); + ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); + ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); + if (!hmm_is_coherent_type(variant->device_number0)) { + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | + HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); + } else { + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | + HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE | + HMM_DMIRROR_PROT_WRITE); + } + + hmm_buffer_free(buffer); +} + +/* + * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that + * should be mapped by a large page table entry. + */ +TEST_F(hmm, compound) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long default_hsize; + int *ptr; + unsigned char *m; + int ret; + unsigned long i; + + /* Skip test if we can't allocate a hugetlbfs page. */ + + default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:"); + if (default_hsize < 0 || default_hsize*1024 < default_hsize) + SKIP(return, "Huge page size could not be determined"); + default_hsize = default_hsize*1024; /* KB to B */ + + size = ALIGN(TWOMEG, default_hsize); + npages = size >> self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (buffer->ptr == MAP_FAILED) { + free(buffer); + return; + } + + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + /* Initialize the pages the device will snapshot in buffer->ptr. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + for (i = 0; i < npages; ++i) + ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE | + HMM_DMIRROR_PROT_PMD); + + /* Make the region read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + for (i = 0; i < npages; ++i) + ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ | + HMM_DMIRROR_PROT_PMD); + + munmap(buffer->ptr, buffer->size); + buffer->ptr = NULL; + hmm_buffer_free(buffer); +} + +/* + * Test two devices reading the same memory (double mapped). + */ +TEST_F(hmm2, double_map) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = 6; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Make region read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate device 0 reading system memory. */ + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Simulate device 1 reading system memory. */ + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Migrate pages to device 1 and try to read from device 0. */ + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what device 0 read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Basic check of exclusive faulting. + */ +TEST_F(hmm, exclusive) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Map memory exclusively for device access. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i]++, i); + + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i+1); + + /* Check atomic access revoked */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages); + ASSERT_EQ(ret, 0); + + hmm_buffer_free(buffer); +} + +TEST_F(hmm, exclusive_mprotect) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Map memory exclusively for device access. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, -EPERM); + + hmm_buffer_free(buffer); +} + +/* + * Check copy-on-write works. + */ +TEST_F(hmm, exclusive_cow) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Map memory exclusively for device access. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + fork(); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i]++, i); + + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i+1); + + hmm_buffer_free(buffer); +} + +static int gup_test_exec(int gup_fd, unsigned long addr, int cmd, + int npages, int size, int flags) +{ + struct gup_test gup = { + .nr_pages_per_call = npages, + .addr = addr, + .gup_flags = FOLL_WRITE | flags, + .size = size, + }; + + if (ioctl(gup_fd, cmd, &gup)) { + perror("ioctl on error\n"); + return errno; + } + + return 0; +} + +/* + * Test get user device pages through gup_test. Setting PIN_LONGTERM flag. + * This should trigger a migration back to system memory for both, private + * and coherent type pages. + * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added + * to your configuration before you run it. + */ +TEST_F(hmm, hmm_gup_test) +{ + struct hmm_buffer *buffer; + int gup_fd; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + if (gup_fd == -1) + SKIP(return, "Skipping test, could not find gup_test driver"); + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr, + GUP_BASIC_TEST, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 1 * self->page_size, + GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 2 * self->page_size, + PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 3 * self->page_size, + PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + if (hmm_is_coherent_type(variant->device_number)) { + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]); + } else { + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]); + } + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]); + /* + * Check again the content on the pages. Make sure there's no + * corrupted data. + */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + close(gup_fd); + hmm_buffer_free(buffer); +} + +/* + * Test copy-on-write in device pages. + * In case of writing to COW private page(s), a page fault will migrate pages + * back to system memory first. Then, these pages will be duplicated. In case + * of COW device coherent type, pages are duplicated directly from device + * memory. + */ +TEST_F(hmm, hmm_cow_in_device) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + pid_t pid; + int status; + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (!pid) { + /* Child process waits for SIGTERM from the parent. */ + while (1) { + } + /* Should not reach this */ + } + /* Parent process writes to COW pages(s) and gets a + * new copy in system. In case of device private pages, + * this write causes a migration to system mem first. + */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Terminate child and wait */ + EXPECT_EQ(0, kill(pid, SIGTERM)); + EXPECT_EQ(pid, waitpid(pid, &status, 0)); + EXPECT_NE(0, WIFSIGNALED(status)); + EXPECT_EQ(SIGTERM, WTERMSIG(status)); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + for (i = 0; i < npages; i++) + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]); + + hmm_buffer_free(buffer); +} + +/* + * Migrate private anonymous huge empty page. + */ +TEST_F(hmm, migrate_anon_huge_empty) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, 2 * size, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Migrate private anonymous huge zero page. + */ +TEST_F(hmm, migrate_anon_huge_zero) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + int val; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, 2 * size, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize a read-only zero huge page. */ + val = *(int *)buffer->ptr; + ASSERT_EQ(val, 0); + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) { + ASSERT_EQ(ptr[i], 0); + /* If it asserts once, it probably will 500,000 times */ + if (ptr[i] != 0) + break; + } + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Migrate private anonymous huge page and free. + */ +TEST_F(hmm, migrate_anon_huge_free) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, 2 * size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Try freeing it. */ + ret = madvise(map, size, MADV_FREE); + ASSERT_EQ(ret, 0); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Migrate private anonymous huge page and fault back to sysmem. + */ +TEST_F(hmm, migrate_anon_huge_fault) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, 2 * size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Migrate memory and fault back to sysmem after partially unmapping. + */ +TEST_F(hmm, migrate_partial_unmap_fault) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size = TWOMEG; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret, j, use_thp; + int offsets[] = { 0, 512 * ONEKB, ONEMEG }; + + for (use_thp = 0; use_thp < 2; ++use_thp) { + for (j = 0; j < ARRAY_SIZE(offsets); ++j) { + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, 2 * size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + if (use_thp) + ret = madvise(map, size, MADV_HUGEPAGE); + else + ret = madvise(map, size, MADV_NOHUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + munmap(buffer->ptr + offsets[j], ONEMEG); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + if (i * sizeof(int) < offsets[j] || + i * sizeof(int) >= offsets[j] + ONEMEG) + ASSERT_EQ(ptr[i], i); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); + } + } +} + +TEST_F(hmm, migrate_remap_fault) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size = TWOMEG; + unsigned long i; + void *old_ptr, *new_ptr = NULL; + void *map; + int *ptr; + int ret, j, use_thp, dont_unmap, before; + int offsets[] = { 0, 512 * ONEKB, ONEMEG }; + + for (before = 0; before < 2; ++before) { + for (dont_unmap = 0; dont_unmap < 2; ++dont_unmap) { + for (use_thp = 0; use_thp < 2; ++use_thp) { + for (j = 0; j < ARRAY_SIZE(offsets); ++j) { + int flags = MREMAP_MAYMOVE | MREMAP_FIXED; + + if (dont_unmap) + flags |= MREMAP_DONTUNMAP; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 8 * size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, size); + + buffer->ptr = mmap(NULL, buffer->size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + if (use_thp) + ret = madvise(map, size, MADV_HUGEPAGE); + else + ret = madvise(map, size, MADV_NOHUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + munmap(map + size, size * 2); + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; + i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + if (before) { + new_ptr = mremap((void *)map, size, size, flags, + map + size + offsets[j]); + ASSERT_NE(new_ptr, MAP_FAILED); + buffer->ptr = new_ptr; + } + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; + i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + if (!before) { + new_ptr = mremap((void *)map, size, size, flags, + map + size + offsets[j]); + ASSERT_NE(new_ptr, MAP_FAILED); + buffer->ptr = new_ptr; + } + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; + i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + munmap(new_ptr, size); + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); + } + } + } + } +} + +/* + * Migrate private anonymous huge page with allocation errors. + */ +TEST_F(hmm, migrate_anon_huge_err) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(2 * size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, 2 * size); + + old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(old_ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)old_ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device but force a THP allocation error. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, + HMM_DMIRROR_FLAG_FAIL_ALLOC); + ASSERT_EQ(ret, 0); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) { + ASSERT_EQ(ptr[i], i); + if (ptr[i] != i) + break; + } + + /* Try faulting back a single (PAGE_SIZE) page. */ + ptr = buffer->ptr; + ASSERT_EQ(ptr[2048], 2048); + + /* unmap and remap the region to reset things. */ + ret = munmap(old_ptr, 2 * size); + ASSERT_EQ(ret, 0); + old_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(old_ptr, MAP_FAILED); + map = (void *)ALIGN((uintptr_t)old_ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + buffer->ptr = map; + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate THP to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* + * Force an allocation error when faulting back a THP resident in the + * device. + */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, + HMM_DMIRROR_FLAG_FAIL_ALLOC); + ASSERT_EQ(ret, 0); + + ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ptr = buffer->ptr; + ASSERT_EQ(ptr[2048], 2048); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Migrate private anonymous huge zero page with allocation errors. + */ +TEST_F(hmm, migrate_anon_huge_zero_err) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = 2 * size; + buffer->mirror = malloc(2 * size); + ASSERT_NE(buffer->mirror, NULL); + memset(buffer->mirror, 0xFF, 2 * size); + + old_ptr = mmap(NULL, 2 * size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(old_ptr, MAP_FAILED); + + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)old_ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + buffer->ptr = map; + + /* Migrate memory to device but force a THP allocation error. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, + HMM_DMIRROR_FLAG_FAIL_ALLOC); + ASSERT_EQ(ret, 0); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + /* Try faulting back a single (PAGE_SIZE) page. */ + ptr = buffer->ptr; + ASSERT_EQ(ptr[2048], 0); + + /* unmap and remap the region to reset things. */ + ret = munmap(old_ptr, 2 * size); + ASSERT_EQ(ret, 0); + old_ptr = mmap(NULL, 2 * size, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0); + ASSERT_NE(old_ptr, MAP_FAILED); + map = (void *)ALIGN((uintptr_t)old_ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + buffer->ptr = map; + + /* Initialize buffer in system memory (zero THP page). */ + ret = ptr[0]; + ASSERT_EQ(ret, 0); + + /* Migrate memory to device but force a THP allocation error. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_FLAGS, buffer, + HMM_DMIRROR_FLAG_FAIL_ALLOC); + ASSERT_EQ(ret, 0); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Fault the device memory back and check it. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +struct benchmark_results { + double sys_to_dev_time; + double dev_to_sys_time; + double throughput_s2d; + double throughput_d2s; +}; + +static double get_time_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0); +} + +static inline struct hmm_buffer *hmm_buffer_alloc(unsigned long size) +{ + struct hmm_buffer *buffer; + + buffer = malloc(sizeof(*buffer)); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + memset(buffer->mirror, 0xFF, size); + return buffer; +} + +static void print_benchmark_results(const char *test_name, size_t buffer_size, + struct benchmark_results *thp, + struct benchmark_results *regular) +{ + double s2d_improvement = ((regular->sys_to_dev_time - thp->sys_to_dev_time) / + regular->sys_to_dev_time) * 100.0; + double d2s_improvement = ((regular->dev_to_sys_time - thp->dev_to_sys_time) / + regular->dev_to_sys_time) * 100.0; + double throughput_s2d_improvement = ((thp->throughput_s2d - regular->throughput_s2d) / + regular->throughput_s2d) * 100.0; + double throughput_d2s_improvement = ((thp->throughput_d2s - regular->throughput_d2s) / + regular->throughput_d2s) * 100.0; + + printf("\n=== %s (%.1f MB) ===\n", test_name, buffer_size / (1024.0 * 1024.0)); + printf(" | With THP | Without THP | Improvement\n"); + printf("---------------------------------------------------------------------\n"); + printf("Sys->Dev Migration | %.3f ms | %.3f ms | %.1f%%\n", + thp->sys_to_dev_time, regular->sys_to_dev_time, s2d_improvement); + printf("Dev->Sys Migration | %.3f ms | %.3f ms | %.1f%%\n", + thp->dev_to_sys_time, regular->dev_to_sys_time, d2s_improvement); + printf("S->D Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n", + thp->throughput_s2d, regular->throughput_s2d, throughput_s2d_improvement); + printf("D->S Throughput | %.2f GB/s | %.2f GB/s | %.1f%%\n", + thp->throughput_d2s, regular->throughput_d2s, throughput_d2s_improvement); +} + +/* + * Run a single migration benchmark + * fd: file descriptor for hmm device + * use_thp: whether to use THP + * buffer_size: size of buffer to allocate + * iterations: number of iterations + * results: where to store results + */ +static inline int run_migration_benchmark(int fd, int use_thp, size_t buffer_size, + int iterations, struct benchmark_results *results) +{ + struct hmm_buffer *buffer; + unsigned long npages = buffer_size / sysconf(_SC_PAGESIZE); + double start, end; + double s2d_total = 0, d2s_total = 0; + int ret, i; + int *ptr; + + buffer = hmm_buffer_alloc(buffer_size); + + /* Map memory */ + buffer->ptr = mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (!buffer->ptr) + return -1; + + /* Apply THP hint if requested */ + if (use_thp) + ret = madvise(buffer->ptr, buffer_size, MADV_HUGEPAGE); + else + ret = madvise(buffer->ptr, buffer_size, MADV_NOHUGEPAGE); + + if (ret) + return ret; + + /* Initialize memory to make sure pages are allocated */ + ptr = (int *)buffer->ptr; + for (i = 0; i < buffer_size / sizeof(int); i++) + ptr[i] = i & 0xFF; + + /* Warmup iteration */ + ret = hmm_migrate_sys_to_dev(fd, buffer, npages); + if (ret) + return ret; + + ret = hmm_migrate_dev_to_sys(fd, buffer, npages); + if (ret) + return ret; + + /* Benchmark iterations */ + for (i = 0; i < iterations; i++) { + /* System to device migration */ + start = get_time_ms(); + + ret = hmm_migrate_sys_to_dev(fd, buffer, npages); + if (ret) + return ret; + + end = get_time_ms(); + s2d_total += (end - start); + + /* Device to system migration */ + start = get_time_ms(); + + ret = hmm_migrate_dev_to_sys(fd, buffer, npages); + if (ret) + return ret; + + end = get_time_ms(); + d2s_total += (end - start); + } + + /* Calculate average times and throughput */ + results->sys_to_dev_time = s2d_total / iterations; + results->dev_to_sys_time = d2s_total / iterations; + results->throughput_s2d = (buffer_size / (1024.0 * 1024.0 * 1024.0)) / + (results->sys_to_dev_time / 1000.0); + results->throughput_d2s = (buffer_size / (1024.0 * 1024.0 * 1024.0)) / + (results->dev_to_sys_time / 1000.0); + + /* Cleanup */ + hmm_buffer_free(buffer); + return 0; +} + +/* + * Benchmark THP migration with different buffer sizes + */ +TEST_F_TIMEOUT(hmm, benchmark_thp_migration, 120) +{ + struct benchmark_results thp_results, regular_results; + size_t thp_size = 2 * 1024 * 1024; /* 2MB - typical THP size */ + int iterations = 5; + + printf("\nHMM THP Migration Benchmark\n"); + printf("---------------------------\n"); + printf("System page size: %ld bytes\n", sysconf(_SC_PAGESIZE)); + + /* Test different buffer sizes */ + size_t test_sizes[] = { + thp_size / 4, /* 512KB - smaller than THP */ + thp_size / 2, /* 1MB - half THP */ + thp_size, /* 2MB - single THP */ + thp_size * 2, /* 4MB - two THPs */ + thp_size * 4, /* 8MB - four THPs */ + thp_size * 8, /* 16MB - eight THPs */ + thp_size * 128, /* 256MB - one twenty eight THPs */ + }; + + static const char *const test_names[] = { + "Small Buffer (512KB)", + "Half THP Size (1MB)", + "Single THP Size (2MB)", + "Two THP Size (4MB)", + "Four THP Size (8MB)", + "Eight THP Size (16MB)", + "One twenty eight THP Size (256MB)" + }; + + int num_tests = ARRAY_SIZE(test_sizes); + + /* Run all tests */ + for (int i = 0; i < num_tests; i++) { + /* Test with THP */ + ASSERT_EQ(run_migration_benchmark(self->fd, 1, test_sizes[i], + iterations, &thp_results), 0); + + /* Test without THP */ + ASSERT_EQ(run_migration_benchmark(self->fd, 0, test_sizes[i], + iterations, ®ular_results), 0); + + /* Print results */ + print_benchmark_results(test_names[i], test_sizes[i], + &thp_results, ®ular_results); + } +} +TEST_HARNESS_MAIN |
