summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c254
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h3
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c72
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h4
4 files changed, 320 insertions, 13 deletions
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 9215b26af10c..a7da4d9471e1 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -12,8 +12,13 @@
#include <unistd.h>
#include <time.h>
#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <asm/barrier.h>
#include "test_util.h"
#include "kvm_util.h"
@@ -57,6 +62,8 @@
# define test_and_clear_bit_le test_and_clear_bit
#endif
+#define TEST_DIRTY_RING_COUNT 1024
+
/*
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
* sync_global_to/from_guest() are used when accessing from
@@ -128,6 +135,25 @@ static uint64_t host_dirty_count;
static uint64_t host_clear_count;
static uint64_t host_track_next_count;
+/* Whether dirty ring reset is requested, or finished */
+static sem_t dirty_ring_vcpu_stop;
+static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is only used for verifying the dirty pages. Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full. When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again. We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
LOG_MODE_DIRTY_LOG = 0,
@@ -135,6 +161,9 @@ enum log_mode_t {
/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
LOG_MODE_CLEAR_LOG = 1,
+ /* Use dirty ring for logging */
+ LOG_MODE_DIRTY_RING = 2,
+
LOG_MODE_NUM,
/* Run all supported modes */
@@ -145,6 +174,20 @@ enum log_mode_t {
static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
/* Logging mode for current run */
static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+ int ret;
+
+ do
+ ret = sem_wait(sem);
+ while (ret == -1 && errno == EINTR);
+}
static bool clear_log_supported(void)
{
@@ -178,15 +221,131 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
}
-static void default_after_vcpu_run(struct kvm_vm *vm)
+static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
{
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+ "vcpu run failed: errno=%d", err);
+
TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s\n",
exit_reason_str(run->exit_reason));
}
+static bool dirty_ring_supported(void)
+{
+ return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+ /*
+ * Switch to dirty ring mode after VM creation but before any
+ * of the vcpu creation.
+ */
+ vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT *
+ sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+ return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+ gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+ int slot, void *bitmap,
+ uint32_t num_pages, uint32_t *fetch_index)
+{
+ struct kvm_dirty_gfn *cur;
+ uint32_t count = 0;
+
+ while (true) {
+ cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT];
+ if (!dirty_gfn_is_dirtied(cur))
+ break;
+ TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+ "%u != %u", cur->slot, slot);
+ TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+ "0x%llx >= 0x%x", cur->offset, num_pages);
+ //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+ set_bit_le(cur->offset, bitmap);
+ dirty_ring_last_page = cur->offset;
+ dirty_gfn_set_collected(cur);
+ (*fetch_index)++;
+ count++;
+ }
+
+ return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+ sem_wait_until(&dirty_ring_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+ pr_info("Notifying vcpu to continue\n");
+ sem_post(&dirty_ring_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages)
+{
+ /* We only have one vcpu */
+ static uint32_t fetch_index = 0;
+ uint32_t count = 0, cleared;
+
+ dirty_ring_wait_vcpu();
+
+ /* Only have one vcpu */
+ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+ slot, bitmap, num_pages, &fetch_index);
+
+ cleared = kvm_vm_reset_dirty_ring(vm);
+
+ /* Cleared pages should be the same as collected */
+ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+ "with collected (%u)", cleared, count);
+
+ dirty_ring_continue_vcpu();
+
+ pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ /* A ucall-sync or ring-full event is allowed */
+ if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+ /* We should allow this to continue */
+ ;
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ /* Update the flag first before pause */
+ sem_post(&dirty_ring_vcpu_stop);
+ pr_info("vcpu stops because dirty ring is full...\n");
+ sem_wait_until(&dirty_ring_vcpu_cont);
+ pr_info("vcpu continues now.\n");
+ } else {
+ TEST_ASSERT(false, "Invalid guest sync status: "
+ "exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+}
+
+static void dirty_ring_before_vcpu_join(void)
+{
+ /* Kick another round of vcpu just to make sure it will quit */
+ sem_post(&dirty_ring_vcpu_cont);
+}
+
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
@@ -197,7 +356,8 @@ struct log_mode {
void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vm *vm);
+ void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+ void (*before_vcpu_join) (void);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -211,6 +371,14 @@ struct log_mode {
.collect_dirty_pages = clear_log_collect_dirty_pages,
.after_vcpu_run = default_after_vcpu_run,
},
+ {
+ .name = "dirty-ring",
+ .supported = dirty_ring_supported,
+ .create_vm_done = dirty_ring_create_vm_done,
+ .collect_dirty_pages = dirty_ring_collect_dirty_pages,
+ .before_vcpu_join = dirty_ring_before_vcpu_join,
+ .after_vcpu_run = dirty_ring_after_vcpu_run,
+ },
};
/*
@@ -260,12 +428,20 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
}
-static void log_mode_after_vcpu_run(struct kvm_vm *vm)
+static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vm);
+ mode->after_vcpu_run(vm, ret, err);
+}
+
+static void log_mode_before_vcpu_join(void)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->before_vcpu_join)
+ mode->before_vcpu_join();
}
static void generate_random_array(uint64_t *guest_array, uint64_t size)
@@ -278,20 +454,22 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
static void *vcpu_worker(void *data)
{
- int ret;
+ int ret, vcpu_fd;
struct kvm_vm *vm = data;
uint64_t *guest_array;
uint64_t pages_count = 0;
+ vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
+
guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
while (!READ_ONCE(host_quit)) {
+ /* Clear any existing kick signals */
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- log_mode_after_vcpu_run(vm);
+ ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+ log_mode_after_vcpu_run(vm, ret, errno);
}
pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -304,6 +482,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
uint64_t step = vm_num_host_pages(mode, 1);
uint64_t page;
uint64_t *value_ptr;
+ uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
value_ptr = host_test_mem + page * host_page_size;
@@ -318,14 +497,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
}
if (test_and_clear_bit_le(page, bmap)) {
+ bool matched;
+
host_dirty_count++;
+
/*
* If the bit is set, the value written onto
* the corresponding page should be either the
* previous iteration number or the current one.
*/
- TEST_ASSERT(*value_ptr == iteration ||
- *value_ptr == iteration - 1,
+ matched = (*value_ptr == iteration ||
+ *value_ptr == iteration - 1);
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+ if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+ /*
+ * Short answer: this case is special
+ * only for dirty ring test where the
+ * page is the last page before a kvm
+ * dirty ring full in iteration N-2.
+ *
+ * Long answer: Assuming ring size R,
+ * one possible condition is:
+ *
+ * main thr vcpu thr
+ * -------- --------
+ * iter=1
+ * write 1 to page 0~(R-1)
+ * full, vmexit
+ * collect 0~(R-1)
+ * kick vcpu
+ * write 1 to (R-1)~(2R-2)
+ * full, vmexit
+ * iter=2
+ * collect (R-1)~(2R-2)
+ * kick vcpu
+ * write 1 to (2R-2)
+ * (NOTE!!! "1" cached in cpu reg)
+ * write 2 to (2R-1)~(3R-3)
+ * full, vmexit
+ * iter=3
+ * collect (2R-2)~(3R-3)
+ * (here if we read value on page
+ * "2R-2" is 1, while iter=3!!!)
+ *
+ * This however can only happen once per iteration.
+ */
+ min_iter = iteration - 1;
+ continue;
+ } else if (page == dirty_ring_last_page) {
+ /*
+ * Please refer to comments in
+ * dirty_ring_last_page.
+ */
+ continue;
+ }
+ }
+
+ TEST_ASSERT(matched,
"Set page %"PRIu64" value %"PRIu64
" incorrect (iteration=%"PRIu64")",
page, *value_ptr, iteration);
@@ -390,7 +619,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
unsigned long interval, uint64_t phys_offset)
{
- pthread_t vcpu_thread;
struct kvm_vm *vm;
unsigned long *bmap;
@@ -488,6 +716,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
/* Tell the vcpu thread to quit */
host_quit = true;
+ log_mode_before_vcpu_join();
pthread_join(vcpu_thread, NULL);
pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
@@ -548,6 +777,9 @@ int main(int argc, char *argv[])
unsigned int mode;
int opt, i, j;
+ sem_init(&dirty_ring_vcpu_stop, 0, 0);
+ sem_init(&dirty_ring_vcpu_cont, 0, 0);
+
#ifdef __x86_64__
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
#endif
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index b2c1364c0402..710009cc17fd 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -74,6 +74,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
@@ -148,6 +149,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_guest_debug *debug);
@@ -201,6 +203,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_nested_state *state, bool ignore_error);
#endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
const char *exit_reason_str(unsigned int exit_reason);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index ef2114e1b7ad..a04302666b02 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -114,6 +114,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
return r;
}
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+ struct kvm_enable_cap cap = { 0 };
+
+ cap.cap = KVM_CAP_DIRTY_LOG_RING;
+ cap.args[0] = ring_size;
+ vm_enable_cap(vm, &cap);
+ vm->dirty_ring_size = ring_size;
+}
+
static void vm_open(struct kvm_vm *vm, int perm)
{
vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -328,6 +338,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
__func__, strerror(-ret));
}
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+}
+
/*
* Userspace Memory Region Find
*
@@ -432,10 +447,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
*
* Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
{
int ret;
+ if (vcpu->dirty_gfns) {
+ ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+ TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
+ "rc: %i errno: %i", ret, errno);
+ vcpu->dirty_gfns = NULL;
+ }
+
ret = munmap(vcpu->state, sizeof(*vcpu->state));
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
@@ -453,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp)
int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
- vm_vcpu_rm(vcpu);
+ vm_vcpu_rm(vmp, vcpu);
ret = close(vmp->fd);
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -1233,6 +1255,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
return rc;
}
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->fd;
+}
+
void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1561,6 +1592,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
return ret;
}
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu;
+ uint32_t size = vm->dirty_ring_size;
+
+ TEST_ASSERT(size > 0, "Should enable dirty ring first");
+
+ vcpu = vcpu_find(vm, vcpuid);
+
+ TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
+
+ if (!vcpu->dirty_gfns) {
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ,
+ MAP_PRIVATE, vcpu->fd,
+ vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
+
+ addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, vcpu->fd,
+ vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
+
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd,
+ vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+
+ vcpu->dirty_gfns = addr;
+ vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
+ }
+
+ return vcpu->dirty_gfns;
+}
+
/*
* VM Ioctl
*
@@ -1680,6 +1747,7 @@ static struct exit_reason {
{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
{KVM_EXIT_OSI, "OSI"},
{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+ {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
#endif
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index f07d383d03a1..34465dc562d8 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -28,6 +28,9 @@ struct vcpu {
uint32_t id;
int fd;
struct kvm_run *state;
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
};
struct kvm_vm {
@@ -52,6 +55,7 @@ struct kvm_vm {
vm_vaddr_t tss;
vm_vaddr_t idt;
vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
};
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);