diff options
Diffstat (limited to 'tools/testing')
65 files changed, 1408 insertions, 1144 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 05d980fb083d..582e2e198fbf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -17,7 +17,7 @@ /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test -/x86_64/emulator_error_test +/x86_64/exit_on_emulation_failure_test /x86_64/fix_hypercall_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test @@ -36,6 +36,7 @@ /x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/sev_migrate_tests +/x86_64/smaller_maxphyaddr_emulation_test /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4a2caef2c939..f62dcfcda618 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -43,16 +43,17 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c -LIBKVM += lib/perf_test_util.c +LIBKVM += lib/memstress.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c LIBKVM_STRING += lib/string_override.c LIBKVM_x86_64 += lib/x86_64/apic.c LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/perf_test_util.c +LIBKVM_x86_64 += lib/x86_64/memstress.c LIBKVM_x86_64 += lib/x86_64/processor.c LIBKVM_x86_64 += lib/x86_64/svm.c LIBKVM_x86_64 += lib/x86_64/ucall.c @@ -81,7 +82,7 @@ TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test -TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test +TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid @@ -96,6 +97,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 6f9c1f19c7f6..b1d2158c0b6d 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -158,12 +158,9 @@ int main(void) TEST_REQUIRE(vcpu_aarch64_only(vcpu)); - ucall_init(vm, NULL); - test_user_raz_wi(vcpu); test_user_raz_invariant(vcpu); test_guest_raz(vcpu); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 574eb73f0e90..f2a96779716a 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -375,7 +375,6 @@ static struct kvm_vm *test_vm_create(void) for (i = 0; i < nr_vcpus; i++) vcpu_init_descriptor_tables(vcpus[i]); - ucall_init(vm, NULL); test_init_timer_irq(vm); gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); @@ -414,36 +413,21 @@ static bool parse_args(int argc, char *argv[]) while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { switch (opt) { case 'n': - test_args.nr_vcpus = atoi(optarg); - if (test_args.nr_vcpus <= 0) { - pr_info("Positive value needed for -n\n"); - goto err; - } else if (test_args.nr_vcpus > KVM_MAX_VCPUS) { + test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); + if (test_args.nr_vcpus > KVM_MAX_VCPUS) { pr_info("Max allowed vCPUs: %u\n", KVM_MAX_VCPUS); goto err; } break; case 'i': - test_args.nr_iter = atoi(optarg); - if (test_args.nr_iter <= 0) { - pr_info("Positive value needed for -i\n"); - goto err; - } + test_args.nr_iter = atoi_positive("Number of iterations", optarg); break; case 'p': - test_args.timer_period_ms = atoi(optarg); - if (test_args.timer_period_ms <= 0) { - pr_info("Positive value needed for -p\n"); - goto err; - } + test_args.timer_period_ms = atoi_positive("Periodicity", optarg); break; case 'm': - test_args.migration_freq_ms = atoi(optarg); - if (test_args.migration_freq_ms < 0) { - pr_info("0 or positive value needed for -m\n"); - goto err; - } + test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); break; case 'h': default: @@ -462,9 +446,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 947bd201435c..d86c4e4d1c82 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -241,7 +241,6 @@ static void guest_svc_handler(struct ex_regs *regs) enum single_step_op { SINGLE_STEP_ENABLE = 0, - SINGLE_STEP_DISABLE = 1, }; static void guest_code_ss(int test_cnt) @@ -258,7 +257,7 @@ static void guest_code_ss(int test_cnt) GUEST_SYNC(SINGLE_STEP_ENABLE); /* - * The userspace will veriry that the pc is as expected during + * The userspace will verify that the pc is as expected during * single step execution between iter_ss_begin and iter_ss_end. */ asm volatile("iter_ss_begin:nop\n"); @@ -268,11 +267,9 @@ static void guest_code_ss(int test_cnt) bvr = read_sysreg(dbgbvr0_el1); wvr = read_sysreg(dbgwvr0_el1); + /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - /* Disable Single Step execution */ - GUEST_SYNC(SINGLE_STEP_DISABLE); - GUEST_ASSERT(bvr == w_bvr); GUEST_ASSERT(wvr == w_wvr); } @@ -295,7 +292,6 @@ static void test_guest_debug_exceptions(void) int stage; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -346,7 +342,6 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_guest_debug debug = {}; vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss); - ucall_init(vm, NULL); run = vcpu->run; vcpu_args_set(vcpu, 1, test_cnt); @@ -364,15 +359,12 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(cmd == UCALL_SYNC, "Unexpected ucall cmd 0x%lx", cmd); - if (uc.args[1] == SINGLE_STEP_ENABLE) { - debug.control = KVM_GUESTDBG_ENABLE | - KVM_GUESTDBG_SINGLESTEP; - ss_enable = true; - } else { - debug.control = SINGLE_STEP_DISABLE; - ss_enable = false; - } + TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE, + "Unexpected ucall action 0x%lx", uc.args[1]); + debug.control = KVM_GUESTDBG_ENABLE | + KVM_GUESTDBG_SINGLESTEP; + ss_enable = true; vcpu_guest_debug_set(vcpu, &debug); continue; } @@ -385,6 +377,14 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); + if ((pc + 4) == (uint64_t)&iter_ss_end) { + test_pc = 0; + debug.control = KVM_GUESTDBG_ENABLE; + ss_enable = false; + vcpu_guest_debug_set(vcpu, &debug); + continue; + } + /* * If the current pc is between iter_ss_bgin and * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should @@ -423,7 +423,7 @@ int main(int argc, char *argv[]) while ((opt = getopt(argc, argv, "i:")) != -1) { switch (opt) { case 'i': - ss_iteration = atoi(optarg); + ss_iteration = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index a39da3fe4952..bef1499fb465 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -236,7 +236,6 @@ static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) vm = vm_create_with_one_vcpu(vcpu, guest_code); - ucall_init(vm, NULL); steal_time_init(*vcpu); return vm; @@ -306,8 +305,6 @@ static void test_run(void) int main(void) { - setbuf(stdout, NULL); - test_run(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index e0b9e81a3e09..cfa36f387948 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -79,7 +79,6 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, struct kvm_vm *vm; vm = vm_create(2); - ucall_init(vm, NULL); vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index 9c131d977a1b..eef816b80993 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -68,8 +68,6 @@ static void guest_code(void) /* we don't want to assert on run execution, hence that helper */ static int run_vcpu(struct kvm_vcpu *vcpu) { - ucall_init(vcpu->vm, NULL); - return __vcpu_run(vcpu) ? -errno : 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 17417220a083..90d854e0fcff 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -756,7 +756,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) print_args(&args); vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -818,22 +817,19 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': - nr_irqs = atoi(optarg); + nr_irqs = atoi_non_negative("Number of IRQs", optarg); if (nr_irqs > 1024 || nr_irqs % 32) help(argv[0]); break; case 'e': - eoi_split = (bool)atoi(optarg); + eoi_split = (bool)atoi_paranoid(optarg); default_args = false; break; case 'l': - level_sensitive = (bool)atoi(optarg); + level_sensitive = (bool)atoi_paranoid(optarg); default_args = false; break; case 'h': diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 76c583a07ea2..02d3587cab0a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -44,7 +44,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" /* Global variable used to synchronize all of the vCPU threads. */ @@ -126,7 +126,7 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) } static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct perf_test_vcpu_args *vcpu_args) + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -148,7 +148,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t gva = base_gva + page * memstress_args.guest_page_size; uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { @@ -220,10 +220,10 @@ static bool spin_wait_for_next_iteration(int *current_iteration) return true; } -static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vm *vm = memstress_args.vm; int vcpu_idx = vcpu_args->vcpu_idx; int current_iteration = 0; @@ -279,7 +279,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti static void access_memory(struct kvm_vm *vm, int nr_vcpus, enum access_type access, const char *description) { - perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); + memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100); iteration_work = ITERATION_ACCESS_MEMORY; run_iteration(vm, nr_vcpus, description); } @@ -303,10 +303,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int nr_vcpus = params->nr_vcpus; - vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, + vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); @@ -324,8 +324,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Set done to signal the vCPU threads to exit */ done = true; - perf_test_join_vcpu_threads(nr_vcpus); - perf_test_destroy_vm(vm); + memstress_join_vcpu_threads(nr_vcpus); + memstress_destroy_vm(vm); } static void help(char *name) @@ -368,7 +368,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.nr_vcpus = atoi(optarg); + params.nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'o': overlap_memory_access = true; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..3a977ddf07b2 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -20,7 +20,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __NR_userfaultfd @@ -42,7 +42,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -285,7 +285,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; int r, i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); demand_paging_size = get_backing_src_pagesz(p->src_type); @@ -307,11 +307,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); for (i = 0; i < nr_vcpus; i++) { - struct perf_test_vcpu_args *vcpu_args; + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - vcpu_args = &perf_test_args.vcpu_args[i]; + vcpu_args = &memstress_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); @@ -329,17 +329,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds[i * 2], p->uffd_mode, p->uffd_delay, &uffd_args[i], vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + vcpu_args->pages * memstress_args.guest_page_size); } } pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); ts_diff = timespec_elapsed(start); pr_info("All vCPU threads joined\n"); @@ -358,10 +358,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - perf_test_args.vcpu_args[0].pages * nr_vcpus / + memstress_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); free(guest_data_prototype); if (p->uffd_mode) { @@ -427,8 +427,8 @@ int main(int argc, char *argv[]) p.src_type = parse_backing_src_type(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'o': diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index f99e39a672d3..c33e89012ae6 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -16,7 +16,7 @@ #include "kvm_util.h" #include "test_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "guest_modes.h" #ifdef __aarch64__ @@ -67,7 +67,7 @@ static bool host_quit; static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; @@ -128,10 +128,12 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; uint64_t phys_offset; - int wr_fract; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + uint32_t write_percent; + uint32_t random_seed; + bool random_access; }; static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) @@ -139,7 +141,7 @@ static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; vm_mem_region_set_flags(vm, slot, flags); @@ -161,7 +163,7 @@ static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); } @@ -173,7 +175,7 @@ static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int i; for (i = 0; i < slots; i++) { - int slot = PERF_TEST_MEM_SLOT_INDEX + i; + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); } @@ -221,11 +223,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec clear_dirty_log_total = (struct timespec){0}; int i; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, p->partition_vcpu_memory_access); - perf_test_set_wr_fract(vm, p->wr_fract); + pr_info("Random seed: %u\n", p->random_seed); + memstress_set_random_seed(vm, p->random_seed); + memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -248,7 +252,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) for (i = 0; i < nr_vcpus; i++) vcpu_last_completed_iteration[i] = -1; - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + /* + * Use 100% writes during the population phase to ensure all + * memory is actually populated and not just mapped to the zero + * page. The prevents expensive copy-on-write faults from + * occurring during the dirty memory iterations below, which + * would pollute the performance results. + */ + memstress_set_write_percent(vm, 100); + memstress_set_random_access(vm, false); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); @@ -269,6 +282,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); + memstress_set_write_percent(vm, p->write_percent); + memstress_set_random_access(vm, p->random_access); + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs @@ -329,7 +345,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) * wait for them to exit. */ host_quit = true; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", @@ -345,16 +361,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) free_bitmaps(bitmaps, p->slots); arch_cleanup_vm(vm); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " - "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" - "[-x memslots]\n", name); + printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] " + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]" + "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name); puts(""); + printf(" -a: access memory randomly rather than in order.\n"); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" @@ -373,16 +390,29 @@ static void help(char *name) printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); - printf(" -f: specify the fraction of pages which should be written to\n" - " as opposed to simply read, in the form\n" - " 1/<fraction of pages to write>.\n" - " (default: 1 i.e. all pages are written to.)\n"); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -r: specify the starting random seed.\n"); backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" " (default: 1)\n"); + printf(" -w: specify the percentage of pages which should be written to\n" + " as an integer from 0-100 inclusive. This is probabalistic,\n" + " so -w X means each page has an X%% chance of writing\n" + " and a (100-X)%% chance of reading.\n" + " (default: 100 i.e. all pages are written to.)\n"); + printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" + " values (target pCPU), one for each vCPU, plus an optional\n" + " entry for the main application task (specified via entry\n" + " <nr_vcpus + 1>). If used, entries must be provided for all\n" + " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" + " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" + " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n" + " To leave the application task unpinned, drop the final entry:\n\n" + " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n" + " (default: no pinning)\n"); puts(""); exit(0); } @@ -390,12 +420,14 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + const char *pcpu_list = NULL; struct test_params p = { .iterations = TEST_HOST_LOOP_N, - .wr_fract = 1, .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .random_seed = 1, + .write_percent = 100, }; int opt; @@ -406,55 +438,73 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) { switch (opt) { + case 'a': + p.random_access = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'c': + pcpu_list = optarg; + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; + break; case 'g': dirty_log_manual_caps = 0; break; - case 'i': - p.iterations = atoi(optarg); + case 'h': + help(argv[0]); break; - case 'p': - p.phys_offset = strtoull(optarg, NULL, 0); + case 'i': + p.iterations = atoi_positive("Number of iterations", optarg); break; case 'm': guest_modes_cmdline(optarg); break; case 'n': - perf_test_args.nested = true; - break; - case 'b': - guest_percpu_mem_size = parse_size(optarg); - break; - case 'f': - p.wr_fract = atoi(optarg); - TEST_ASSERT(p.wr_fract >= 1, - "Write fraction cannot be less than one"); - break; - case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, - "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + memstress_args.nested = true; break; case 'o': p.partition_vcpu_memory_access = false; break; + case 'p': + p.phys_offset = strtoull(optarg, NULL, 0); + break; + case 'r': + p.random_seed = atoi_positive("Random seed", optarg); + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; + case 'v': + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); + break; + case 'w': + p.write_percent = atoi_non_negative("Write percentage", optarg); + TEST_ASSERT(p.write_percent <= 100, + "Write percentage must be between 0 and 100"); + break; case 'x': - p.slots = atoi(optarg); + p.slots = atoi_positive("Number of slots", optarg); break; - case 'h': default: help(argv[0]); break; } } + if (pcpu_list) { + kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu, + nr_vcpus); + memstress_args.pin_vcpus = true; + } + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); pr_info("Test iterations: %"PRIu64"\n", p.iterations); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index b5234d6efbe1..a38c4369fb8e 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -756,8 +756,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Cache the HVA pointer of the region */ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); - ucall_init(vm, NULL); - /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); sync_global_to_guest(vm, guest_page_size); @@ -813,7 +811,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(host_bmap_track); - ucall_uninit(vm); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..228212ede05e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -16,6 +16,7 @@ #include <linux/kvm.h> #include "linux/rbtree.h" +#include <asm/atomic.h> #include <sys/ioctl.h> @@ -81,6 +82,7 @@ struct kvm_vm { struct sparsebit *vpages_mapped; bool has_irqchip; bool pgd_created; + vm_paddr_t ucall_mmio_addr; vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; @@ -688,6 +690,10 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + unsigned long vm_compute_max_gfn(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); @@ -718,6 +724,19 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, memcpy(&(g), _p, sizeof(g)); \ }) +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, @@ -838,4 +857,13 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h new file mode 100644 index 000000000000..bbd2a302df10 --- /dev/null +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/memstress.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_MEMSTRESS_H +#define SELFTEST_KVM_MEMSTRESS_H + +#include <pthread.h> + +#include "kvm_util.h" + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +#define MEMSTRESS_MEM_SLOT_INDEX 1 + +struct memstress_vcpu_args { + uint64_t gpa; + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + struct kvm_vcpu *vcpu; + int vcpu_idx; +}; + +struct memstress_args { + struct kvm_vm *vm; + /* The starting address and size of the guest test region. */ + uint64_t gpa; + uint64_t size; + uint64_t guest_page_size; + uint32_t random_seed; + uint32_t write_percent; + + /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ + bool nested; + /* Randomize which pages are accessed by the guest. */ + bool random_access; + /* True if all vCPUs are pinned to pCPUs */ + bool pin_vcpus; + /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ + uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; + + struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS]; +}; + +extern struct memstress_args memstress_args; + +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, + uint64_t vcpu_memory_bytes, int slots, + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access); +void memstress_destroy_vm(struct kvm_vm *vm); + +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); +void memstress_set_random_access(struct kvm_vm *vm, bool random_access); + +void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); +void memstress_join_vcpu_threads(int vcpus); +void memstress_guest_code(uint32_t vcpu_id); + +uint64_t memstress_nested_pages(int nr_vcpus); +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); + +#endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h deleted file mode 100644 index eaa88df0555a..000000000000 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * tools/testing/selftests/kvm/include/perf_test_util.h - * - * Copyright (C) 2020, Google LLC. - */ - -#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H -#define SELFTEST_KVM_PERF_TEST_UTIL_H - -#include <pthread.h> - -#include "kvm_util.h" - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ - -#define PERF_TEST_MEM_SLOT_INDEX 1 - -struct perf_test_vcpu_args { - uint64_t gpa; - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - struct kvm_vcpu *vcpu; - int vcpu_idx; -}; - -struct perf_test_args { - struct kvm_vm *vm; - /* The starting address and size of the guest test region. */ - uint64_t gpa; - uint64_t size; - uint64_t guest_page_size; - int wr_fract; - - /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ - bool nested; - - struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; -}; - -extern struct perf_test_args perf_test_args; - -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src, - bool partition_vcpu_memory_access); -void perf_test_destroy_vm(struct kvm_vm *vm); - -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); - -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); -void perf_test_join_vcpu_threads(int vcpus); -void perf_test_guest_code(uint32_t vcpu_id); - -uint64_t perf_test_nested_pages(int nr_vcpus); -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); - -#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index befc754ce9b3..80d6416f3012 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -77,6 +77,13 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); +struct guest_random_state { + uint32_t seed; +}; + +struct guest_random_state new_guest_random_state(uint32_t seed); +uint32_t guest_random_u32(struct guest_random_state *state); + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, @@ -152,4 +159,22 @@ static inline void *align_ptr_up(void *x, size_t size) return (void *)align_up((unsigned long)x, size); } +int atoi_paranoid(const char *num_str); + +static inline uint32_t atoi_positive(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str); + return num; +} + +static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +{ + int num = atoi_paranoid(num_str); + + TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str); + return num; +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ee79d180e07e..bdd373189a77 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -22,12 +22,18 @@ enum { struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + + /* Host virtual address of this struct. */ + struct ucall *hva; }; -void ucall_init(struct kvm_vm *vm, void *arg); -void ucall_uninit(struct kvm_vm *vm); +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +void ucall_arch_do_ucall(vm_vaddr_t uc); +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); + void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 5da0c5e2a7af..f838ac5865dc 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -63,16 +63,21 @@ struct kvm_x86_cpu_feature { u8 reg; u8 bit; }; -#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ -({ \ - struct kvm_x86_cpu_feature feature = { \ - .function = fn, \ - .index = idx, \ - .reg = KVM_CPUID_##gpr, \ - .bit = __bit, \ - }; \ - \ - feature; \ +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \ + feature; \ }) /* @@ -89,6 +94,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6) #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) #define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) @@ -162,6 +168,102 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) #define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) +/* + * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit + * value/property as opposed to a single-bit feature. Again, pack the info + * into a 64-bit value to pass by value with no overhead. + */ +struct kvm_x86_cpu_property { + u32 function; + u8 index; + u8 reg; + u8 lo_bit; + u8 hi_bit; +}; +#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \ +({ \ + struct kvm_x86_cpu_property property = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .lo_bit = low_bit, \ + .hi_bit = high_bit, \ + }; \ + \ + static_assert(low_bit < high_bit); \ + static_assert((fn & 0xc0000000) == 0 || \ + (fn & 0xc0000000) == 0x40000000 || \ + (fn & 0xc0000000) == 0x80000000 || \ + (fn & 0xc0000000) == 0xc0000000); \ + static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \ + property; \ +}) + +#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) +#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) +#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) + +#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) +#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31) +#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31) +#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15) +#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31) +#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15) +#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31) +#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15) + +#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31) + +#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) +#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) +#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) + +#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) + +/* + * Intel's architectural PMU events are bizarre. They have a "feature" bit + * that indicates the feature is _not_ supported, and a property that states + * the length of the bit mask of unsupported features. A feature is supported + * if the size of the bit mask is larger than the "unavailable" bit, and said + * bit is not set. + * + * Wrap the "unavailable" feature to simplify checking whether or not a given + * architectural event is supported. + */ +struct kvm_x86_pmu_feature { + struct kvm_x86_cpu_feature anti_feature; +}; +#define KVM_X86_PMU_FEATURE(name, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \ + }; \ + \ + feature; \ +}) + +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} + /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) #define PTE_WRITABLE_MASK BIT_ULL(1) @@ -172,12 +274,18 @@ struct kvm_x86_cpu_feature { #define PTE_GLOBAL_MASK BIT_ULL(8) #define PTE_NX_MASK BIT_ULL(63) +#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) + #define PAGE_SHIFT 12 #define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK) -#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) -#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) +#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) +#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x)) +#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK) + +#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK) +#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT) /* General Registers in 64-Bit Mode */ struct gpr64_regs { @@ -425,15 +533,74 @@ static inline void cpuid(uint32_t function, return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +static inline uint32_t this_cpu_fms(void) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(1, &eax, &ebx, &ecx, &edx); + return eax; +} + +static inline uint32_t this_cpu_family(void) +{ + return x86_family(this_cpu_fms()); +} + +static inline uint32_t this_cpu_model(void) +{ + return x86_model(this_cpu_fms()); +} + +static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { uint32_t gprs[4]; - __cpuid(feature.function, feature.index, + __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); - return gprs[feature.reg] & BIT(feature.bit); + return (gprs[reg] & GENMASK(hi, lo)) >> lo; +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + return __this_cpu_has(feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +{ + return __this_cpu_has(property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); +} + +static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !this_cpu_has(feature.anti_feature); } #define SET_XMM(__var, __xmm) \ @@ -526,23 +693,6 @@ static inline void cpu_relax(void) bool is_intel_cpu(void); bool is_amd_cpu(void); -static inline unsigned int x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - -static inline unsigned int x86_model(unsigned int eax) -{ - return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); -} - struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); void kvm_x86_state_cleanup(struct kvm_x86_state *state); @@ -604,10 +754,27 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); } +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +static inline uint32_t kvm_cpu_fms(void) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; +} + +static inline uint32_t kvm_cpu_family(void) +{ + return x86_family(kvm_cpu_fms()); +} + +static inline uint32_t kvm_cpu_model(void) +{ + return x86_model(kvm_cpu_fms()); +} + bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, struct kvm_x86_cpu_feature feature); @@ -616,6 +783,42 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); + +static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +{ + return kvm_cpuid_property(kvm_get_supported_cpuid(), property); +} + +static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) +{ + uint32_t max_leaf; + + switch (property.function & 0xc0000000) { + case 0: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF); + break; + case 0x40000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF); + break; + case 0x80000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF); + break; + case 0xc0000000: + max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF); + } + return max_leaf >= property.function; +} + +static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) +{ + uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + + return nr_bits > feature.anti_feature.bit && + !kvm_cpu_has(feature.anti_feature); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + @@ -639,8 +842,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) return cpuid; } -const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); @@ -701,17 +902,6 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function, - uint32_t index) -{ - return get_cpuid_entry(kvm_get_supported_cpuid(), function, index); -} - -static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function) -{ - return __kvm_get_supported_cpuid_entry(function, 0); -} - uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); @@ -723,15 +913,6 @@ static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); } -static inline uint32_t kvm_get_cpuid_max_basic(void) -{ - return kvm_get_supported_cpuid_entry(0)->eax; -} - -static inline uint32_t kvm_get_cpuid_max_extended(void) -{ - return kvm_get_supported_cpuid_entry(0x80000000)->eax; -} void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); @@ -777,7 +958,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * for recursive faults when accessing memory in the handler. The downside to * using registers is that it restricts what registers can be used by the actual * instruction. But, selftests are 64-bit only, making register* pressure a - * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved + * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved * by the callee, and except for r11 are not implicit parameters to any * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V @@ -793,39 +974,52 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * * REGISTER OUTPUTS: * r9 = exception vector (non-zero) + * r10 = error code */ #define KVM_ASM_SAFE(insn) \ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ "1: " insn "\n\t" \ - "movb $0, %[vector]\n\t" \ - "jmp 3f\n\t" \ + "xor %%r9, %%r9\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ - "3:\n\t" + "mov %%r10, %[error_code]\n\t" -#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) +#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" -#define kvm_asm_safe(insn, inputs...) \ -({ \ - uint8_t vector; \ - \ - asm volatile(KVM_ASM_SAFE(insn) \ - : KVM_ASM_SAFE_OUTPUTS(vector) \ - : inputs \ - : KVM_ASM_SAFE_CLOBBERS); \ - vector; \ +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define kvm_asm_safe_ec(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ }) static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) { + uint64_t error_code; uint8_t vector; uint32_t a, d; asm volatile(KVM_ASM_SAFE("rdmsr") - : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector) + : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code) : "c"(msr) : KVM_ASM_SAFE_CLOBBERS); @@ -840,10 +1034,9 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) bool kvm_is_tdp_enabled(void); -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr); -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte); +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level); +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); @@ -895,4 +1088,27 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) #define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \ XSTATE_XTILE_DATA_MASK) + +#define PFERR_PRESENT_BIT 0 +#define PFERR_WRITE_BIT 1 +#define PFERR_USER_BIT 2 +#define PFERR_RSVD_BIT 3 +#define PFERR_FETCH_BIT 4 +#define PFERR_PK_BIT 5 +#define PFERR_SGX_BIT 15 +#define PFERR_GUEST_FINAL_BIT 32 +#define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 + +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 71b290b6469d..e9c96b49966a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -572,7 +572,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); -bool kvm_vm_has_ept(struct kvm_vm *vm); +bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index f42c6ac6d71d..b3b00be1ef82 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -289,7 +289,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ - ucall_init(vm, NULL); sync_global_to_guest(vm, test_args); ret = sem_init(&test_stage_updated, 0, 0); @@ -417,7 +416,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) TEST_ASSERT(ret == 0, "Error in sem_destroy"); free(vcpu_threads); - ucall_uninit(vm); kvm_vm_free(vm); } @@ -461,8 +459,8 @@ int main(int argc, char *argv[]) p.test_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 's': diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6f5551368944..0de4aabc0c76 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -495,15 +495,6 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, close(kvm_fd); } -/* - * arm64 doesn't have a true default mode, so start by computing the - * available IPA space and page sizes early. - */ -void __attribute__((constructor)) init_guest_modes(void) -{ - guest_modes_append_default(); -} - void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res) @@ -528,3 +519,12 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); } + +void kvm_selftest_arch_init(void) +{ + /* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ + guest_modes_append_default(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index ed237b744690..562c16dfbb00 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,108 +6,36 @@ */ #include "kvm_util.h" +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ static vm_vaddr_t *ucall_exit_mmio_addr; -static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) -{ - if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) - return false; - - virt_pg_map(vm, gpa, gpa); - - ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; - sync_global_to_guest(vm, ucall_exit_mmio_addr); - - return true; -} - -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - vm_paddr_t gpa, start, end, step, offset; - unsigned int bits; - bool ret; + virt_pg_map(vm, mmio_gpa, mmio_gpa); - if (arg) { - gpa = (vm_paddr_t)arg; - ret = ucall_mmio_init(vm, gpa); - TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); - return; - } + vm->ucall_mmio_addr = mmio_gpa; - /* - * Find an address within the allowed physical and virtual address - * spaces, that does _not_ have a KVM memory region associated with - * it. Identity mapping an address like this allows the guest to - * access it, but as KVM doesn't know what to do with it, it - * will assume it's something userspace handles and exit with - * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. - * Here we start with a guess that the addresses around 5/8th - * of the allowed space are unmapped and then work both down and - * up from there in 1/16th allowed space sized steps. - * - * Note, we need to use VA-bits - 1 when calculating the allowed - * virtual address space for an identity mapping because the upper - * half of the virtual address space is the two's complement of the - * lower and won't match physical addresses. - */ - bits = vm->va_bits - 1; - bits = min(vm->pa_bits, bits); - end = 1ul << bits; - start = end * 5 / 8; - step = end / 16; - for (offset = 0; offset < end - start; offset += step) { - if (ucall_mmio_init(vm, start - offset)) - return; - if (ucall_mmio_init(vm, start + offset)) - return; - } - TEST_FAIL("Can't find a ucall mmio address"); + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - ucall_exit_mmio_addr = 0; - sync_global_to_guest(vm, ucall_exit_mmio_addr); + WRITE_ONCE(*ucall_exit_mmio_addr, uc); } -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = {}; - va_list va; - int i; - - WRITE_ONCE(uc.cmd, cmd); - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); - va_end(va); - - WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); -} - -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { - vm_vaddr_t gva; - - TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), "Unexpected ucall exit mmio address access"); - memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)(*((uint64_t *)run->mmio.data)); } - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..d71a9a5974de 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -138,7 +138,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) offset = hdr.e_phoff + (n1 * hdr.e_phentsize); offset_rv = lseek(fd, offset, SEEK_SET); TEST_ASSERT(offset_rv == offset, - "Failed to seek to begining of program header %u,\n" + "Failed to seek to beginning of program header %u,\n" " filename: %s\n" " rv: %jd errno: %i", n1, filename, (intmax_t) offset_rv, errno); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..5ac8f207ed92 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -11,6 +11,7 @@ #include "processor.h" #include <assert.h> +#include <sched.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -334,15 +335,24 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, { uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); + struct userspace_mem_region *slot0; struct kvm_vm *vm; vm = ____vm_create(mode, nr_pages); kvm_vm_elf_load(vm, program_invocation_name); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + /* + * TODO: Add proper defines to protect the library's memslots, and then + * carve out memslot1 for the ucall MMIO address. KVM treats writes to + * read-only memslots as MMIO, and creating a read-only memslot for the + * MMIO region would prevent silently clobbering the MMIO region. + */ + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); + return vm; } @@ -443,6 +453,59 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } +void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +{ + cpu_set_t mask; + int r; + + CPU_ZERO(&mask); + CPU_SET(pcpu, &mask); + r = sched_setaffinity(0, sizeof(mask), &mask); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); +} + +static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +{ + uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + + TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), + "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + return pcpu; +} + +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus) +{ + cpu_set_t allowed_mask; + char *cpu, *cpu_list; + char delim[2] = ","; + int i, r; + + cpu_list = strdup(pcpus_string); + TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + + r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_getaffinity() failed"); + + cpu = strtok(cpu_list, delim); + + /* 1. Get all pcpus for vcpus. */ + for (i = 0; i < nr_vcpus; i++) { + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); + cpu = strtok(NULL, delim); + } + + /* 2. Check if the main worker needs to be pinned. */ + if (cpu) { + kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + cpu = strtok(NULL, delim); + } + + TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); + free(cpu_list); +} + /* * Userspace Memory Region Find * @@ -2021,3 +2084,19 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, break; } } + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ +} + +__weak void kvm_selftest_arch_init(void) +{ +} + +void __attribute((constructor)) kvm_selftest_init(void) +{ + /* Tell stdout not to buffer its content. */ + setbuf(stdout, NULL); + + kvm_selftest_arch_init(); +} diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/memstress.c index 9618b37c66f7..2de8a5d527b3 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,13 +2,15 @@ /* * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE + #include <inttypes.h> #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" -struct perf_test_args perf_test_args; +struct memstress_args memstress_args; /* * Guest virtual memory offset of the testing memory slot. @@ -31,7 +33,7 @@ struct vcpu_thread { static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; /* The function run by each vCPU thread, as provided by the test. */ -static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); +static void (*vcpu_thread_fn)(struct memstress_vcpu_args *); /* Set to true once all vCPU threads are up and running. */ static bool all_vcpu_threads_running; @@ -42,14 +44,19 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void perf_test_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(uint32_t vcpu_idx) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; + struct guest_random_state rand_state; uint64_t gva; uint64_t pages; + uint64_t addr; + uint64_t page; int i; + rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -58,9 +65,14 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + if (args->random_access) + page = guest_random_u32(&rand_state) % pages; + else + page = i; - if (i % pta->wr_fract == 0) + addr = gva + (page * args->guest_page_size); + + if (guest_random_u32(&rand_state) % 100 < args->write_percent) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -70,17 +82,17 @@ void perf_test_guest_code(uint32_t vcpu_idx) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, +void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args; + struct memstress_args *args = &memstress_args; + struct memstress_vcpu_args *vcpu_args; int i; for (i = 0; i < nr_vcpus; i++) { - vcpu_args = &pta->vcpu_args[i]; + vcpu_args = &args->vcpu_args[i]; vcpu_args->vcpu = vcpus[i]; vcpu_args->vcpu_idx = i; @@ -89,29 +101,29 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, vcpu_args->gva = guest_test_virt_mem + (i * vcpu_memory_bytes); vcpu_args->pages = vcpu_memory_bytes / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes); + args->guest_page_size; + vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes); } else { vcpu_args->gva = guest_test_virt_mem; vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) / - pta->guest_page_size; - vcpu_args->gpa = pta->gpa; + args->guest_page_size; + vcpu_args->gpa = args->gpa; } vcpu_args_set(vcpus[i], 1, i); pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", i, vcpu_args->gpa, vcpu_args->gpa + - (vcpu_args->pages * pta->guest_page_size)); + (vcpu_args->pages * args->guest_page_size)); } } -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, +struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { - struct perf_test_args *pta = &perf_test_args; + struct memstress_args *args = &memstress_args; struct kvm_vm *vm; uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); @@ -121,20 +133,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); /* By default vCPUs will write to memory. */ - pta->wr_fract = 1; + args->write_percent = 100; /* * Snapshot the non-huge page size. This is used by the guest code to * access/dirty pages at the logging granularity. */ - pta->guest_page_size = vm_guest_mode_params[mode].page_size; + args->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size); TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", @@ -144,8 +156,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * If using nested, allocate extra pages for the nested page tables and * in-memory data structures. */ - if (pta->nested) - slot0_pages += perf_test_nested_pages(nr_vcpus); + if (args->nested) + slot0_pages += memstress_nested_pages(nr_vcpus); /* * Pass guest_num_pages to populate the page tables for test memory. @@ -153,9 +165,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * effect as KVM allows aliasing HVAs in meslots. */ vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, - perf_test_guest_code, vcpus); + memstress_guest_code, vcpus); - pta->vm = vm; + args->vm = vm; /* Put the test region at the top guest physical memory. */ region_end_gfn = vm->max_gfn + 1; @@ -165,8 +177,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, * When running vCPUs in L2, restrict the test region to 48 bits to * avoid needing 5-level page tables to identity map L2. */ - if (pta->nested) - region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); + if (args->nested) + region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size); #endif /* * If there should be more memory in the guest test region than there @@ -178,63 +190,72 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, " nr_vcpus: %d wss: %" PRIx64 "]\n", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); - pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size; - pta->gpa = align_down(pta->gpa, backing_src_pagesz); + args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; + args->gpa = align_down(args->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - pta->gpa = align_down(pta->gpa, 1 << 20); + args->gpa = align_down(args->gpa, 1 << 20); #endif - pta->size = guest_num_pages * pta->guest_page_size; + args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", - pta->gpa, pta->gpa + pta->size); + args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; + vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, - PERF_TEST_MEM_SLOT_INDEX + i, + MEMSTRESS_MEM_SLOT_INDEX + i, region_pages, 0); } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages); - perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, + memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); - if (pta->nested) { + if (args->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); - perf_test_setup_nested(vm, nr_vcpus, vcpus); + memstress_setup_nested(vm, nr_vcpus, vcpus); } - ucall_init(vm, NULL); - /* Export the shared variables to the guest. */ - sync_global_to_guest(vm, perf_test_args); + sync_global_to_guest(vm, memstress_args); return vm; } -void perf_test_destroy_vm(struct kvm_vm *vm) +void memstress_destroy_vm(struct kvm_vm *vm) { - ucall_uninit(vm); kvm_vm_free(vm); } -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) +{ + memstress_args.write_percent = write_percent; + sync_global_to_guest(vm, memstress_args.write_percent); +} + +void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) +{ + memstress_args.random_seed = random_seed; + sync_global_to_guest(vm, memstress_args.random_seed); +} + +void memstress_set_random_access(struct kvm_vm *vm, bool random_access) { - perf_test_args.wr_fract = wr_fract; - sync_global_to_guest(vm, perf_test_args); + memstress_args.random_access = random_access; + sync_global_to_guest(vm, memstress_args.random_access); } -uint64_t __weak perf_test_nested_pages(int nr_vcpus) +uint64_t __weak memstress_nested_pages(int nr_vcpus) { return 0; } -void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) +void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) { pr_info("%s() not support on this architecture, skipping.\n", __func__); exit(KSFT_SKIP); @@ -243,6 +264,10 @@ void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v static void *vcpu_thread_main(void *data) { struct vcpu_thread *vcpu = data; + int vcpu_idx = vcpu->vcpu_idx; + + if (memstress_args.pin_vcpus) + kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -255,13 +280,13 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]); + vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]); return NULL; } -void perf_test_start_vcpu_threads(int nr_vcpus, - void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void memstress_start_vcpu_threads(int nr_vcpus, + void (*vcpu_fn)(struct memstress_vcpu_args *)) { int i; @@ -285,7 +310,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus, WRITE_ONCE(all_vcpu_threads_running, true); } -void perf_test_join_vcpu_threads(int nr_vcpus) +void memstress_join_vcpu_threads(int nr_vcpus) { int i; diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 087b9740bc8f..9a3476a2dfca 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,11 +10,7 @@ #include "kvm_util.h" #include "processor.h" -void ucall_init(struct kvm_vm *vm, void *arg) -{ -} - -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } @@ -44,47 +40,22 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall(uint64_t cmd, int nargs, ...) +void ucall_arch_do_ucall(vm_vaddr_t uc) { - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, - (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); + uc, 0, 0, 0, 0, 0); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_RISCV_SBI && run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - memcpy(&ucall, - addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); - - break; + return (void *)run->riscv_sbi.args[0]; case KVM_RISCV_SELFTESTS_SBI_UNEXP: vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); @@ -93,6 +64,5 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) break; } } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 73dc4e21190f..a7f02dc372cf 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,40 +6,19 @@ */ #include "kvm_util.h" -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && @@ -47,13 +26,7 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)run->s.regs.gprs[reg]; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 6d23878bbfe1..5c22fa4c2825 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,23 @@ #include "test_util.h" /* + * Random number generator that is usable from guest code. This is the + * Park-Miller LCG using standard constants. + */ + +struct guest_random_state new_guest_random_state(uint32_t seed) +{ + struct guest_random_state s = {.seed = seed}; + return s; +} + +uint32_t guest_random_u32(struct guest_random_state *state) +{ + state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + return state->seed; +} + +/* * Parses "[0-9]+[kmgt]?". */ size_t parse_size(const char *size) @@ -334,3 +351,22 @@ long get_run_delay(void) return val[1]; } + +int atoi_paranoid(const char *num_str) +{ + char *end_ptr; + long num; + + errno = 0; + num = strtol(num_str, &end_ptr, 0); + TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str); + TEST_ASSERT(num_str != end_ptr, + "strtol(\"%s\") didn't find a valid integer.", num_str); + TEST_ASSERT(*end_ptr == '\0', + "strtol(\"%s\") failed to parse trailing characters \"%s\".", + num_str, end_ptr); + TEST_ASSERT(num >= INT_MIN && num <= INT_MAX, + "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX); + + return num; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c new file mode 100644 index 000000000000..fcae96461e46 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "kvm_util.h" +#include "linux/types.h" +#include "linux/bitmap.h" +#include "linux/atomic.h" + +struct ucall_header { + DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); + struct ucall ucalls[KVM_MAX_VCPUS]; +}; + +/* + * ucall_pool holds per-VM values (global data is duplicated by each VM), it + * must not be accessed from host code. + */ +static struct ucall_header *ucall_pool; + +void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + struct ucall_header *hdr; + struct ucall *uc; + vm_vaddr_t vaddr; + int i; + + vaddr = vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR); + hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + memset(hdr, 0, sizeof(*hdr)); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + uc = &hdr->ucalls[i]; + uc->hva = uc; + } + + write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + + ucall_arch_init(vm, mmio_gpa); +} + +static struct ucall *ucall_alloc(void) +{ + struct ucall *uc; + int i; + + GUEST_ASSERT(ucall_pool); + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) { + uc = &ucall_pool->ucalls[i]; + memset(uc->args, 0, sizeof(uc->args)); + return uc; + } + } + + GUEST_ASSERT(0); + return NULL; +} + +static void ucall_free(struct ucall *uc) +{ + /* Beware, here be pointer arithmetic. */ + clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall *uc; + va_list va; + int i; + + uc = ucall_alloc(); + + WRITE_ONCE(uc->cmd, cmd); + + nargs = min(nargs, UCALL_MAX_ARGS); + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + struct ucall ucall; + void *addr; + + if (!uc) + uc = &ucall; + + addr = ucall_arch_get_ucall(vcpu); + if (addr) { + memcpy(uc, addr, sizeof(*uc)); + vcpu_run_complete_io(vcpu); + } else { + memset(uc, 0, sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c index 0f344a7c89c4..d61e623afc8c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to perf_test_util.c. + * x86_64-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -11,25 +11,25 @@ #include "test_util.h" #include "kvm_util.h" -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "vmx.h" -void perf_test_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(uint64_t vcpu_id) { - perf_test_guest_code(vcpu_id); + memstress_guest_code(vcpu_id); vmcall(); } -extern char perf_test_l2_guest_entry[]; +extern char memstress_l2_guest_entry[]; __asm__( -"perf_test_l2_guest_entry:" +"memstress_l2_guest_entry:" " mov (%rsp), %rdi;" -" call perf_test_l2_guest_code;" +" call memstress_l2_guest_code;" " ud2;" ); -static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -42,14 +42,14 @@ static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; *rsp = vcpu_id; - prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); + prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t perf_test_nested_pages(int nr_vcpus) +uint64_t memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,7 +59,7 @@ uint64_t perf_test_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; @@ -72,12 +72,12 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) */ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); - start = align_down(perf_test_args.gpa, PG_SIZE_1G); - end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); + start = align_down(memstress_args.gpa, PG_SIZE_1G); + end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); nested_identity_map_1g(vmx, vm, start, end - start); } -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) +void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; @@ -85,12 +85,13 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc int vcpu_id; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vmx = vcpu_alloc_vmx(vm, &vmx_gva); if (vcpu_id == 0) { - perf_test_setup_ept(vmx, vm); + memstress_setup_ept(vmx, vm); vmx0 = vmx; } else { /* Share the same EPT table across all vCPUs. */ @@ -100,11 +101,11 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc } /* - * Override the vCPU to run perf_test_l1_guest_code() which will - * bounce it into L2 before calling perf_test_guest_code(). + * Override the vCPU to run memstress_l1_guest_code() which will + * bounce it into L2 before calling memstress_guest_code(). */ vcpu_regs_get(vcpus[vcpu_id], ®s); - regs.rip = (unsigned long) perf_test_l1_guest_code; + regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 41c1c73c464d..d532c20c74fd 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -131,23 +131,28 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, - int level) +static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, + uint64_t vaddr, int level) { - uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); + uint64_t pt_gpa = PTE_GET_PA(*parent_pte); + uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", + level + 1, vaddr); + return &page_table[index]; } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t pt_pfn, + uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, int current_level, int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; @@ -197,21 +202,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, - vaddr, paddr, PG_LEVEL_512G, level); + pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); + pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); + pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); @@ -241,30 +245,25 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, - struct kvm_vcpu *vcpu, - uint64_t vaddr) +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - struct kvm_sregs sregs; - uint64_t rsvd_mask = 0; + if (*pte & PTE_LARGE_MASK) { + TEST_ASSERT(*level == PG_LEVEL_NONE || + *level == current_level, + "Unexpected hugepage at level %d\n", current_level); + *level = current_level; + } - /* Set the high bits in the reserved mask. */ - if (vm->pa_bits < 52) - rsvd_mask = GENMASK_ULL(51, vm->pa_bits); + return *level == current_level; +} - /* - * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries - * with 4-Level Paging and 5-Level Paging". - * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, - * the XD flag (bit 63) is reserved. - */ - vcpu_sregs_get(vcpu, &sregs); - if ((sregs.efer & EFER_NX) == 0) { - rsvd_mask |= PTE_NX_MASK; - } +uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, + int *level) +{ + uint64_t *pml4e, *pdpe, *pde; + + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + "Invalid PG_LEVEL_* '%d'", *level); TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -279,54 +278,26 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), "Canonical check failed. The virtual address is invalid."); - index[0] = (vaddr >> 12) & 0x1ffu; - index[1] = (vaddr >> 21) & 0x1ffu; - index[2] = (vaddr >> 30) & 0x1ffu; - index[3] = (vaddr >> 39) & 0x1ffu; + pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); + if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) + return pml4e; - pml4e = addr_gpa2hva(vm, vm->pgd); - TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK, - "Expected pml4e to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0, - "Unexpected reserved bits set."); + pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); + if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) + return pdpe; - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK, - "Expected pdpe to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK), - "Expected pdpe to map a pde not a 1-GByte page."); - TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); + pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); + if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) + return pde; - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK, - "Expected pde to be present for gva: 0x%08lx", vaddr); - TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK), - "Expected pde to map a pte not a 2-MByte page."); - TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0, - "Unexpected reserved bits set."); - - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK, - "Expected pte to be present for gva: 0x%08lx", vaddr); - - return &pte[index[0]]; -} - -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr) -{ - uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); - - return *(uint64_t *)pte; + return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); } -void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, - uint64_t vaddr, uint64_t pte) +uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { - uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); + int level = PG_LEVEL_4K; - *(uint64_t *)new_pte = pte; + return __vm_get_page_table_entry(vm, vaddr, &level); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) @@ -512,41 +483,17 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { - uint16_t index[4]; - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - index[0] = (gva >> 12) & 0x1ffu; - index[1] = (gva >> 21) & 0x1ffu; - index[2] = (gva >> 30) & 0x1ffu; - index[3] = (gva >> 39) & 0x1ffu; + int level = PG_LEVEL_NONE; + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); - if (!vm->pgd_created) - goto unmapped_gva; - pml4e = addr_gpa2hva(vm, vm->pgd); - if (!(pml4e[index[3]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size); - if (!(pdpe[index[2]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size); - if (!(pde[index[1]] & PTE_PRESENT_MASK)) - goto unmapped_gva; - - pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size); - if (!(pte[index[0]] & PTE_PRESENT_MASK)) - goto unmapped_gva; + TEST_ASSERT(*pte & PTE_PRESENT_MASK, + "Leaf PTE not PRESENT for gva: 0x%08lx", gva); - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); - -unmapped_gva: - TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(EXIT_FAILURE); + /* + * No need for a hugepage mask on the PTE, x86-64 requires the "unused" + * address bits to be zero. + */ + return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) @@ -639,6 +586,11 @@ void __vm_xsave_require_permission(int bit, const char *name) bitmask); } +void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ + vm_create_irqchip(vm); +} + struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { @@ -700,8 +652,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return cpuid; } -bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_feature feature) +static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index, + uint8_t reg, uint8_t lo, uint8_t hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -714,12 +667,25 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, * order, but kvm_x86_cpu_feature matches that mess, so yay * pointer shenanigans! */ - if (entry->function == feature.function && - entry->index == feature.index) - return (&entry->eax)[feature.reg] & BIT(feature.bit); + if (entry->function == function && entry->index == index) + return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo; } - return false; + return 0; +} + +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + return __kvm_cpu_has(cpuid, feature.function, feature.index, + feature.reg, feature.bit, feature.bit); +} + +uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) +{ + return __kvm_cpu_has(cpuid, property.function, property.index, + property.reg, property.lo_bit, property.hi_bit); } uint64_t kvm_get_feature_msr(uint64_t msr_index) @@ -1059,18 +1025,12 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { - const struct kvm_cpuid_entry2 *entry; - bool pae; - - /* SDM 4.1.4 */ - if (kvm_get_cpuid_max_extended() < 0x80000008) { - pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); - *pa_bits = pae ? 36 : 32; + if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { + *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { - entry = kvm_get_supported_cpuid_entry(0x80000008); - *pa_bits = entry->eax & 0xff; - *va_bits = (entry->eax >> 8) & 0xff; + *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR); } } @@ -1103,6 +1063,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) regs->rip = regs->r11; regs->r9 = regs->vector; + regs->r10 = regs->error_code; return true; } @@ -1265,7 +1226,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint32_t eax, ebx, ecx, edx, max_ext_leaf; + uint8_t maxphyaddr; max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; @@ -1279,8 +1240,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; - cpuid(1, &eax, &ebx, &ecx, &edx); - if (x86_family(eax) < 0x17) + if (this_cpu_family() < 0x17) goto done; /* @@ -1288,17 +1248,14 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - max_ext_leaf = eax; - if (max_ext_leaf < 0x80000008) + if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) goto done; - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; - if (max_ext_leaf >= 0x8000001f) { - cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); - max_pfn >>= (ebx >> 6) & 0x3f; - } + maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); + max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1; + + if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION)) + max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION); ht_gfn = max_pfn - num_ht_pages; done: diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index e5f0f9e0d3ee..4d41dc63cc9e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,52 +8,25 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_init(struct kvm_vm *vm, void *arg) +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { } -void ucall_uninit(struct kvm_vm *vm) +void ucall_arch_do_ucall(vm_vaddr_t uc) { -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = min(nargs, UCALL_MAX_ARGS); - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - struct ucall ucall = {}; - - if (uc) - memset(uc, 0, sizeof(*uc)); if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; vcpu_regs_get(vcpu, ®s); - memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi), - sizeof(ucall)); - - vcpu_run_complete_io(vcpu); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); + return (void *)regs.rdi; } - - return ucall.cmd; + return NULL; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d21049c38fc5..3e4ea846366c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -544,26 +544,22 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); } -bool kvm_vm_has_ept(struct kvm_vm *vm) +bool kvm_cpu_has_ept(void) { - struct kvm_vcpu *vcpu; uint64_t ctrl; - vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list); - TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n"); - - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; - ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32; return ctrl & SECONDARY_EXEC_ENABLE_EPT; } void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { - TEST_REQUIRE(kvm_vm_has_ept(vm)); + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); vmx->eptp = (void *)vm_vaddr_alloc_page(vm); vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 9a6e4f3ad6b5..feaf2be20ff2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -11,6 +11,7 @@ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/atomic.h> +#include <linux/sizes.h> #include "kvm_util.h" #include "test_util.h" @@ -162,8 +163,7 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t size_1gb = (1 << 30); - const uint64_t start_gpa = (4ull * size_1gb); + const uint64_t start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2; @@ -180,29 +180,26 @@ int main(int argc, char *argv[]) * are quite common for x86, requires changing only max_mem (KVM allows * 32k memslots, 32k * 2gb == ~64tb of guest memory). */ - slot_size = 2 * size_1gb; + slot_size = SZ_2G; max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_slots > first_slot, "KVM is broken"); /* All KVM MMUs should be able to survive a 128gb guest. */ - max_mem = 128 * size_1gb; + max_mem = 128ull * SZ_1G; calc_default_nr_vcpus(); while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) { switch (opt) { case 'c': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0"); + nr_vcpus = atoi_positive("Number of vCPUs", optarg); break; case 'm': - max_mem = atoi(optarg) * size_1gb; - TEST_ASSERT(max_mem > 0, "memory size must be >0"); + max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G; break; case 's': - slot_size = atoi(optarg) * size_1gb; - TEST_ASSERT(slot_size > 0, "slot size must be >0"); + slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G; break; case 'H': hugepages = true; @@ -245,7 +242,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += size_1gb) + for (i = 0; i < slot_size; i += SZ_1G) __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else for (i = 0; i < slot_size; i += vm->page_size) @@ -260,7 +257,7 @@ int main(int argc, char *argv[]) vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", - (gpa - start_gpa) / size_1gb, nr_vcpus); + (gpa - start_gpa) / SZ_1G, nr_vcpus); rendezvous_with_vcpus(&time_start, "spawning"); rendezvous_with_vcpus(&time_run1, "run 1"); diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index bb1d17a1171b..d07e921bfcc5 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -21,7 +21,7 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "perf_test_util.h" +#include "memstress.h" #include "processor.h" #include "test_util.h" #include "guest_modes.h" @@ -36,7 +36,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus = true; -static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_run *run; @@ -72,10 +72,10 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, int i; /* - * Add the dummy memslot just below the perf_test_util memslot, which is + * Add the dummy memslot just below the memstress memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm->page_size; + gpa = memstress_args.gpa - pages * vm->page_size; for (i = 0; i < nr_modifications; i++) { usleep(delay); @@ -87,8 +87,8 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, } struct test_params { - useconds_t memslot_modification_delay; - uint64_t nr_memslot_modifications; + useconds_t delay; + uint64_t nr_iterations; bool partition_vcpu_memory_access; }; @@ -97,25 +97,24 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; - vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, + vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); pr_info("Finished creating vCPUs\n"); - perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); + memstress_start_vcpu_threads(nr_vcpus, vcpu_worker); pr_info("Started all vCPUs\n"); - add_remove_memslot(vm, p->memslot_modification_delay, - p->nr_memslot_modifications); + add_remove_memslot(vm, p->delay, p->nr_iterations); run_vcpus = false; - perf_test_join_vcpu_threads(nr_vcpus); + memstress_join_vcpu_threads(nr_vcpus); pr_info("All vCPU threads joined\n"); - perf_test_destroy_vm(vm); + memstress_destroy_vm(vm); } static void help(char *name) @@ -144,9 +143,8 @@ int main(int argc, char *argv[]) int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); int opt; struct test_params p = { - .memslot_modification_delay = 0, - .nr_memslot_modifications = - DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, + .delay = 0, + .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS, .partition_vcpu_memory_access = true }; @@ -158,16 +156,14 @@ int main(int argc, char *argv[]) guest_modes_cmdline(optarg); break; case 'd': - p.memslot_modification_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(p.memslot_modification_delay >= 0, - "A negative delay is not supported."); + p.delay = atoi_non_negative("Delay", optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'v': - nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + nr_vcpus = atoi_positive("Number of vCPUs", optarg); + TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; @@ -175,7 +171,7 @@ int main(int argc, char *argv[]) p.partition_vcpu_memory_access = false; break; case 'i': - p.nr_memslot_modifications = atoi(optarg); + p.nr_iterations = atoi_positive("Number of iterations", optarg); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 44995446d942..36b20abfb948 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -277,7 +277,6 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, TEST_ASSERT(data->hva_slots, "malloc() fail"); data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); - ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", max_mem_slots - 1, data->pages_per_slot, rempages); @@ -885,40 +884,28 @@ static bool parse_args(int argc, char *argv[], map_unmap_verify = true; break; case 's': - targs->nslots = atoi(optarg); + targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 0 && targs->nslots != -1) { pr_info("Slot count cap has to be positive or -1 for no cap\n"); return false; } break; case 'f': - targs->tfirst = atoi(optarg); - if (targs->tfirst < 0) { - pr_info("First test to run has to be non-negative\n"); - return false; - } + targs->tfirst = atoi_non_negative("First test", optarg); break; case 'e': - targs->tlast = atoi(optarg); - if (targs->tlast < 0 || targs->tlast >= NTESTS) { + targs->tlast = atoi_non_negative("Last test", optarg); + if (targs->tlast >= NTESTS) { pr_info("Last test to run has to be non-negative and less than %zu\n", NTESTS); return false; } break; case 'l': - targs->seconds = atoi(optarg); - if (targs->seconds < 0) { - pr_info("Test length in seconds has to be non-negative\n"); - return false; - } + targs->seconds = atoi_non_negative("Test length", optarg); break; case 'r': - targs->runs = atoi(optarg); - if (targs->runs <= 0) { - pr_info("Runs per test has to be positive\n"); - return false; - } + targs->runs = atoi_positive("Runs per test", optarg); break; } } @@ -1007,9 +994,6 @@ int main(int argc, char *argv[]) struct test_result rbestslottime; int tctr; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - if (!parse_args(argc, argv, &targs)) return -1; diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 6f88da7e60be..3045fdf9bdf5 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -205,9 +205,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); @@ -224,7 +221,6 @@ int main(int argc, char *argv[]) * CPU affinity. */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - ucall_init(vm, NULL); pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9113696d5178..3fd81e58f40c 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -760,8 +760,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index 19486084eb30..e41e2cb8ffa9 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -296,8 +296,6 @@ int main(int argc, char *argv[]) bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); int idx; - setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 3fdb6e2598eb..2ddde41c44ba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -231,9 +231,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(testlist)); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 0d55f508d595..2ef1d1b72ce4 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -392,9 +392,6 @@ int main(int argc, char *argv[]) int i, loops; #endif - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - #ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because @@ -407,7 +404,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ if (argc > 1) - loops = atoi(argv[1]); + loops = atoi_positive("Number of iterations", argv[1]); else loops = 10; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index db8967f1a17b..c87f38712073 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -266,7 +266,6 @@ int main(int ac, char **av) gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); - ucall_init(vm, NULL); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 1c274933912b..7f5b330b6a1b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -121,7 +121,6 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); check_preconditions(vcpu); - ucall_init(vm, NULL); enter_guest(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index dadcbad10a1d..21de6ae42086 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -39,11 +39,6 @@ #define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) -#define TILE_CPUID 0x1d -#define XSTATE_CPUID 0xd -#define TILE_PALETTE_CPUID_SUBLEAVE 0x1 -#define XSTATE_USER_STATE_SUBLEAVE 0x0 - #define XSAVE_HDR_OFFSET 512 struct xsave_data { @@ -129,71 +124,26 @@ static bool check_xsave_supports_xtile(void) return __xgetbv(0) & XFEATURE_MASK_XTILE; } -static bool enum_xtile_config(void) -{ - u32 eax, ebx, ecx, edx; - - __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx || !ecx) - return false; - - xtile.max_names = ebx >> 16; - if (xtile.max_names < NUM_TILES) - return false; - - xtile.bytes_per_tile = eax >> 16; - if (xtile.bytes_per_tile < TILE_SIZE) - return false; - - xtile.bytes_per_row = ebx; - xtile.max_rows = ecx; - - return true; -} - -static bool enum_xsave_tile(void) -{ - u32 eax, ebx, ecx, edx; - - __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx); - if (!eax || !ebx) - return false; - - xtile.xsave_offset = ebx; - xtile.xsave_size = eax; - - return true; -} - -static bool check_xsave_size(void) -{ - u32 eax, ebx, ecx, edx; - bool valid = false; - - __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx); - if (ebx && ebx <= XSAVE_SIZE) - valid = true; - - return valid; -} - -static bool check_xtile_info(void) +static void check_xtile_info(void) { - bool ret = false; - - if (!check_xsave_size()) - return ret; - - if (!enum_xsave_tile()) - return ret; - - if (!enum_xtile_config()) - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); - if (sizeof(struct tile_data) >= xtile.xsave_size) - ret = true; + xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET); + GUEST_ASSERT(xtile.xsave_offset == 2816); + xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE); + GUEST_ASSERT(xtile.xsave_size == 8192); + GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size); - return ret; + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS)); + xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS); + GUEST_ASSERT(xtile.max_names == 8); + xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE); + GUEST_ASSERT(xtile.bytes_per_tile == 1024); + xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW); + GUEST_ASSERT(xtile.bytes_per_row == 64); + xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS); + GUEST_ASSERT(xtile.max_rows == 16); } static void set_tilecfg(struct tile_config *cfg) @@ -238,16 +188,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, { init_regs(); check_cpuid_xsave(); - GUEST_ASSERT(check_xsave_supports_xtile()); - GUEST_ASSERT(check_xtile_info()); - - /* check xtile configs */ - GUEST_ASSERT(xtile.xsave_offset == 2816); - GUEST_ASSERT(xtile.xsave_size == 8192); - GUEST_ASSERT(xtile.max_names == 8); - GUEST_ASSERT(xtile.bytes_per_tile == 1024); - GUEST_ASSERT(xtile.bytes_per_row == 64); - GUEST_ASSERT(xtile.max_rows == 16); + check_xsave_supports_xtile(); + check_xtile_info(); GUEST_SYNC(1); /* xfd=0, enable amx */ @@ -317,8 +259,9 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - /* Get xsave/restore max size */ - xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx; + TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE), + "KVM should enumerate max XSAVE size when XSAVE is supported"); + xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE); run = vcpu->run; vcpu_regs_get(vcpu, ®s1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index a6aeee2e62e4..2fc3ad9c887e 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -43,15 +43,6 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) } -static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) -{ - u32 eax, ebx, ecx, edx; - - cpuid(0x40000000, &eax, &ebx, &ecx, &edx); - - GUEST_ASSERT(eax == 0x40000001); -} - static void guest_main(struct kvm_cpuid2 *guest_cpuid) { GUEST_SYNC(1); @@ -60,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - test_cpuid_40000000(guest_cpuid); + GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); GUEST_DONE(); } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 4208487652f8..1027a671c7d3 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -57,9 +57,6 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c deleted file mode 100644 index 236e11755ba6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ /dev/null @@ -1,193 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020, Google LLC. - * - * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - -#include "test_util.h" -#include "kvm_util.h" -#include "vmx.h" - -#define MAXPHYADDR 36 - -#define MEM_REGION_GVA 0x0000123456789000 -#define MEM_REGION_GPA 0x0000000700000000 -#define MEM_REGION_SLOT 10 -#define MEM_REGION_SIZE PAGE_SIZE - -static void guest_code(void) -{ - __asm__ __volatile__("flds (%[addr])" - :: [addr]"r"(MEM_REGION_GVA)); - - GUEST_DONE(); -} - -/* - * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, - * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". - */ -#define GET_RM(insn_byte) (insn_byte & 0x7) -#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3) -#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6) - -/* Ensure we are dealing with a simple 2-byte flds instruction. */ -static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) -{ - return insn_size >= 2 && - insn_bytes[0] == 0xd9 && - GET_REG(insn_bytes[1]) == 0x0 && - GET_MOD(insn_bytes[1]) == 0x0 && - /* Ensure there is no SIB byte. */ - GET_RM(insn_bytes[1]) != 0x4 && - /* Ensure there is no displacement byte. */ - GET_RM(insn_bytes[1]) != 0x5; -} - -static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct kvm_regs regs; - uint8_t *insn_bytes; - uint8_t insn_size; - uint64_t flags; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Unexpected suberror: %u", - run->emulation_failure.suberror); - - if (run->emulation_failure.ndata >= 1) { - flags = run->emulation_failure.flags; - if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) && - run->emulation_failure.ndata >= 3) { - insn_size = run->emulation_failure.insn_size; - insn_bytes = run->emulation_failure.insn_bytes; - - TEST_ASSERT(insn_size <= 15 && insn_size > 0, - "Unexpected instruction size: %u", - insn_size); - - TEST_ASSERT(is_flds(insn_bytes, insn_size), - "Unexpected instruction. Expected 'flds' (0xd9 /0)"); - - /* - * If is_flds() succeeded then the instruction bytes - * contained an flds instruction that is 2-bytes in - * length (ie: no prefix, no SIB, no displacement). - */ - vcpu_regs_get(vcpu, ®s); - regs.rip += 2; - vcpu_regs_set(vcpu, ®s); - } - } -} - -static void do_guest_assert(struct ucall *uc) -{ - REPORT_GUEST_ASSERT(*uc); -} - -static void check_for_guest_assert(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (vcpu->run->exit_reason == KVM_EXIT_IO && - get_ucall(vcpu, &uc) == UCALL_ABORT) { - do_guest_assert(&uc); - } -} - -static void process_ucall_done(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - check_for_guest_assert(vcpu); - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, - "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", - uc.cmd, UCALL_DONE); -} - -static uint64_t process_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - struct ucall uc; - - TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s)", - run->exit_reason, - exit_reason_str(run->exit_reason)); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - break; - case UCALL_ABORT: - do_guest_assert(&uc); - break; - case UCALL_DONE: - process_ucall_done(vcpu); - break; - default: - TEST_ASSERT(false, "Unexpected ucall"); - } - - return uc.cmd; -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t gpa, pte; - uint64_t *hva; - int rc; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); - - rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); - TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); - vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); - - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - MEM_REGION_GPA, MEM_REGION_SLOT, - MEM_REGION_SIZE / PAGE_SIZE, 0); - gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, - MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); - virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); - hva = addr_gpa2hva(vm, MEM_REGION_GPA); - memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); - vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36)); - - vcpu_run(vcpu); - process_exit_on_emulation_error(vcpu); - vcpu_run(vcpu); - - TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE"); - - kvm_vm_free(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c new file mode 100644 index 000000000000..37c61f712fd5 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + * + * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" + +#define MMIO_GPA 0x700000000 +#define MMIO_GVA MMIO_GPA + +static void guest_code(void) +{ + /* Execute flds with an MMIO address to force KVM to emulate it. */ + flds(MMIO_GVA); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + virt_map(vm, MMIO_GVA, MMIO_GPA, 1); + + vcpu_run(vcpu); + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h new file mode 100644 index 000000000000..e43a7df25f2c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_FLDS_EMULATION_H +#define SELFTEST_KVM_FLDS_EMULATION_H + +#include "kvm_util.h" + +#define FLDS_MEM_EAX ".byte 0xd9, 0x00" + +/* + * flds is an instruction that the KVM instruction emulator is known not to + * support. This can be used in guest code along with a mechanism to force + * KVM to emulate the instruction (e.g. by providing an MMIO address) to + * exercise emulation failures. + */ +static inline void flds(uint64_t address) +{ + __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); +} + +static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + uint8_t *insn_bytes; + uint64_t flags; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Unexpected exit reason: %u (%s)", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Unexpected suberror: %u", + run->emulation_failure.suberror); + + flags = run->emulation_failure.flags; + TEST_ASSERT(run->emulation_failure.ndata >= 3 && + flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES, + "run->emulation_failure is missing instruction bytes"); + + TEST_ASSERT(run->emulation_failure.insn_size >= 2, + "Expected a 2-byte opcode for 'flds', got %d bytes", + run->emulation_failure.insn_size); + + insn_bytes = run->emulation_failure.insn_bytes; + TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + insn_bytes[0], insn_bytes[1]); + + vcpu_regs_get(vcpu, ®s); + regs.rip += 2; + vcpu_regs_set(vcpu, ®s); +} + +#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index e804eb08dff9..5c27efbf405e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -134,9 +134,6 @@ int main(int argc, char *argv[]) const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 05b32e550a80..2b6d455acf8a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -18,6 +18,7 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address, uint64_t *hv_status) { + uint64_t error_code; uint8_t vector; /* Note both the hypercall and the "asm safe" clobber r9-r11. */ @@ -25,7 +26,7 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, KVM_ASM_SAFE("vmcall") : "=a" (*hv_status), "+c" (control), "+d" (input_address), - KVM_ASM_SAFE_OUTPUTS(vector) + KVM_ASM_SAFE_OUTPUTS(vector, error_code) : [output_address] "r"(output_address), "a" (-EFAULT) : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 59ffe7fd354f..ea0978f22db8 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,10 +241,10 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi(optarg); + reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); break; case 't': - token = atoi(optarg); + token = atoi_paranoid(optarg); break; case 'r': reboot_permissions = true; @@ -257,7 +257,6 @@ int main(int argc, char **argv) } TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); - TEST_REQUIRE(reclaim_period_ms > 0); __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 76417c7d687b..310a104d94f0 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,9 +72,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t msr_platform_info; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index ea4e259a1e2e..2de98fce7edd 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -21,29 +21,6 @@ #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_ebx { - struct { - unsigned int no_unhalted_core_cycles:1; - unsigned int no_instructions_retired:1; - unsigned int no_unhalted_reference_cycles:1; - unsigned int no_llc_reference:1; - unsigned int no_llc_misses:1; - unsigned int no_branch_instruction_retired:1; - unsigned int no_branch_misses_retired:1; - } split; - unsigned int full; -}; - /* End of stuff taken from perf_event.h. */ /* Oddly, this isn't in perf_event.h. */ @@ -380,46 +357,31 @@ static void test_pmu_config_disable(void (*guest_code)(void)) } /* - * Check for a non-zero PMU version, at least one general-purpose - * counter per logical processor, an EBX bit vector of length greater - * than 5, and EBX[5] clear. - */ -static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry) -{ - union cpuid10_eax eax = { .full = entry->eax }; - union cpuid10_ebx ebx = { .full = entry->ebx }; - - return eax.split.version_id && eax.split.num_counters > 0 && - eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && - !ebx.split.no_branch_instruction_retired; -} - -/* - * Note that CPUID leaf 0xa is Intel-specific. This leaf should be - * clear on AMD hardware. + * On Intel, check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, and support for counting the number of branch + * instructions retired. */ static bool use_intel_pmu(void) { - const struct kvm_cpuid_entry2 *entry; - - entry = kvm_get_supported_cpuid_entry(0xa); - return is_intel_cpu() && check_intel_pmu_leaf(entry); + return is_intel_cpu() && + kvm_cpu_property(X86_PROPERTY_PMU_VERSION) && + kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) && + kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t eax) +static bool is_zen1(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; + return family == 0x17 && model <= 0x0f; } -static bool is_zen2(uint32_t eax) +static bool is_zen2(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x17 && - x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; + return family == 0x17 && model >= 0x30 && model <= 0x3f; } -static bool is_zen3(uint32_t eax) +static bool is_zen3(uint32_t family, uint32_t model) { - return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; + return family == 0x19 && model <= 0x0f; } /* @@ -432,13 +394,13 @@ static bool is_zen3(uint32_t eax) */ static bool use_amd_pmu(void) { - const struct kvm_cpuid_entry2 *entry; + uint32_t family = kvm_cpu_family(); + uint32_t model = kvm_cpu_model(); - entry = kvm_get_supported_cpuid_entry(1); return is_amd_cpu() && - (is_zen1(entry->eax) || - is_zen2(entry->eax) || - is_zen3(entry->eax)); + (is_zen1(family, model) || + is_zen2(family, model) || + is_zen3(family, model)); } int main(int argc, char *argv[]) @@ -447,9 +409,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 2bb08bf2125d..a284fcef6ed7 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -82,9 +82,6 @@ int main(int argc, char *argv[]) uint64_t cr4; int rc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - /* * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and * use it to verify all supported CR4 bits can be set prior to defining diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c new file mode 100644 index 000000000000..06edf00a97d6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + * + * Test that KVM emulates instructions in response to EPT violations when + * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ + +#include "flds_emulation.h" + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define MAXPHYADDR 36 + +#define MEM_REGION_GVA 0x0000123456789000 +#define MEM_REGION_GPA 0x0000000700000000 +#define MEM_REGION_SLOT 10 +#define MEM_REGION_SIZE PAGE_SIZE + +static void guest_code(bool tdp_enabled) +{ + uint64_t error_code; + uint64_t vector; + + vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); + + /* + * When TDP is enabled, flds will trigger an emulation failure, exit to + * userspace, and then the selftest host "VMM" skips the instruction. + * + * When TDP is disabled, no instruction emulation is required so flds + * should generate #PF(RSVD). + */ + if (tdp_enabled) { + GUEST_ASSERT(!vector); + } else { + GUEST_ASSERT_EQ(vector, PF_VECTOR); + GUEST_ASSERT(error_code & PFERR_RSVD_MASK); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + uint64_t *pte; + uint64_t *hva; + uint64_t gpa; + int rc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); + + rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); + TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + MEM_REGION_GPA, MEM_REGION_SLOT, + MEM_REGION_SIZE / PAGE_SIZE, 0); + gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, + MEM_REGION_GPA, MEM_REGION_SLOT); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); + memset(hva, 0, PAGE_SIZE); + + pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); + *pte |= BIT_ULL(MAXPHYADDR); + + vcpu_run(vcpu); + + /* + * When TDP is enabled, KVM must emulate in response the guest physical + * address that is illegal from the guest's perspective, but is legal + * from hardware's perspeective. This should result in an emulation + * failure exit to userspace since KVM doesn't support emulating flds. + */ + if (kvm_is_tdp_enabled()) { + handle_flds_emulation_failure_exit(vcpu); + vcpu_run(vcpu); + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 1f136a81858e..cb38a478e1f6 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; int stage, stage_reported; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index e637d7736012..e497ace629c1 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -194,9 +194,6 @@ done: int main(int argc, char *argv[]) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 9b6db0b0b13e..d2f9b5bdfab2 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -90,9 +90,6 @@ int main(int argc, char *argv[]) struct kvm_vcpu_events events; int rv, cap; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 7316521428f8..91076c9787b4 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -56,9 +56,6 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index a4f06370a245..25fa55344a10 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -733,16 +733,98 @@ static void test_msr_permission_bitmap(void) kvm_vm_free(vm); } -int main(int argc, char *argv[]) +#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ +({ \ + int r = __vm_ioctl(vm, cmd, arg); \ + \ + if (flag & valid_mask) \ + TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \ + else \ + TEST_ASSERT(r == -1 && errno == EINVAL, \ + "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \ + #cmd, flag, r, errno, strerror(errno)); \ +}) + +static void run_user_space_msr_flag_test(struct kvm_vm *vm) { - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); + struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR }; + int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); + TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); + + for (i = 0; i < nflags; i++) { + cap.args[0] = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap, + BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK); + } +} + +static void run_msr_filter_flag_test(struct kvm_vm *vm) +{ + u64 deny_bits = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + .ranges = { + { + .flags = KVM_MSR_FILTER_READ, + .nmsrs = 1, + .base = 0, + .bitmap = (uint8_t *)&deny_bits, + }, + }, + }; + int nflags; + int rc; + int i; + + rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); + TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); + + nflags = sizeof(filter.flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK); + } + filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW; + nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE; + for (i = 0; i < nflags; i++) { + filter.ranges[0].flags = BIT_ULL(i); + test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter, + BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK); + } +} + +/* Test that attempts to write to the unused bits in a flag fails. */ +static void test_user_exit_msr_flags(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ + run_user_space_msr_flag_test(vm); + + /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ + run_msr_filter_flag_test(vm); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ test_msr_filter_allow(); test_msr_filter_deny(); test_msr_permission_bitmap(); + test_user_exit_msr_flags(); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 2d8c23d639f7..f0456fb031b1 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -78,6 +78,7 @@ int main(int argc, char *argv[]) bool done = false; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 069589c52f41..c280ba1e6572 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -20,16 +20,6 @@ #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - union perf_capabilities { struct { u64 lbr_format:6; @@ -53,11 +43,9 @@ static void guest_code(void) int main(int argc, char *argv[]) { - const struct kvm_cpuid_entry2 *entry_a_0; struct kvm_vm *vm; struct kvm_vcpu *vcpu; int ret; - union cpuid10_eax eax; union perf_capabilities host_cap; uint64_t val; @@ -69,11 +57,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); - TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa); - entry_a_0 = kvm_get_supported_cpuid_entry(0xa); - - eax.full = entry_a_0->eax; - __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU"); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); /* testcase 1, set capabilities when we have PDCM bit */ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 6f7a5ef66718..d7d37dae3eeb 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -114,7 +114,9 @@ static void test_icr(struct xapic_vcpu *x) * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; - for (i = vcpu->id + 1; i < 0xff; i++) { + for (i = 0; i < 0xff; i++) { + if (i == vcpu->id) + continue; for (j = 0; j < 8; j++) __test_icr(x, i << (32 + 24) | icr | (j << 8)); } |