diff options
Diffstat (limited to 'tools/testing/selftests/kvm')
| -rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/Makefile | 329 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/Makefile.kvm | 356 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/access_tracking_perf_test.c | 281 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arch_timer.c | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/aarch32_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c) | 14 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/arch_timer.c (renamed from tools/testing/selftests/kvm/aarch64/arch_timer.c) | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 1059 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/at.c | 166 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/debug-exceptions.c (renamed from tools/testing/selftests/kvm/aarch64/debug-exceptions.c) | 30 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/external_aborts.c | 415 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/get-reg-list.c (renamed from tools/testing/selftests/kvm/aarch64/get-reg-list.c) | 296 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/hello_el2.c | 71 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/host_sve.c | 127 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/hypercalls.c (renamed from tools/testing/selftests/kvm/aarch64/hypercalls.c) | 52 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/kvm-uuid.c | 70 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/no-vgic-v3.c | 177 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/page_fault_test.c (renamed from tools/testing/selftests/kvm/aarch64/page_fault_test.c) | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/psci_test.c (renamed from tools/testing/selftests/kvm/aarch64/psci_test.c) | 109 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/sea_to_user.c | 331 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/set_id_regs.c (renamed from tools/testing/selftests/kvm/aarch64/set_id_regs.c) | 303 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/smccc_filter.c (renamed from tools/testing/selftests/kvm/aarch64/smccc_filter.c) | 17 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vcpu_width_config.c (renamed from tools/testing/selftests/kvm/aarch64/vcpu_width_config.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_init.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_init.c) | 261 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_irq.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_irq.c) | 314 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c (renamed from tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c) | 15 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/arm64/vpmu_counter_access.c (renamed from tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c) | 102 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/coalesced_io_test.c | 236 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/config | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/dirty_log_perf_test.c | 35 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/dirty_log_test.c | 524 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/get-reg-list.c | 9 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/guest_memfd_test.c | 367 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/guest_print_test.c | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/hardware_disable_test.c | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/arch_timer.h (renamed from tools/testing/selftests/kvm/include/aarch64/arch_timer.h) | 42 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/delay.h (renamed from tools/testing/selftests/kvm/include/aarch64/delay.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic.h) | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic_v3.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/gic_v3_its.h (renamed from tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h) | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h | 10 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/processor.h (renamed from tools/testing/selftests/kvm/include/aarch64/processor.h) | 179 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/spinlock.h (renamed from tools/testing/selftests/kvm/include/aarch64/spinlock.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/ucall.h (renamed from tools/testing/selftests/kvm/include/aarch64/ucall.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/arm64/vgic.h (renamed from tools/testing/selftests/kvm/include/aarch64/vgic.h) | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/kvm_syscalls.h | 81 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 226 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/loongarch/arch_timer.h | 85 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/loongarch/processor.h | 220 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/loongarch/ucall.h | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/lru_gen_util.h | 51 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/numaif.h | 110 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/riscv/processor.h | 24 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/debug_print.h | 69 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/diag318_test_handler.h (renamed from tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/facility.h | 50 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/processor.h (renamed from tools/testing/selftests/kvm/include/s390x/processor.h) | 11 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/sie.h | 240 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/s390/ucall.h (renamed from tools/testing/selftests/kvm/include/s390x/ucall.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/test_util.h | 22 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/apic.h (renamed from tools/testing/selftests/kvm/include/x86_64/apic.h) | 31 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/evmcs.h (renamed from tools/testing/selftests/kvm/include/x86_64/evmcs.h) | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/hyperv.h (renamed from tools/testing/selftests/kvm/include/x86_64/hyperv.h) | 21 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/kvm_util_arch.h (renamed from tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/mce.h (renamed from tools/testing/selftests/kvm/include/x86_64/mce.h) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/pmu.h (renamed from tools/testing/selftests/kvm/include/x86_64/pmu.h) | 26 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/processor.h (renamed from tools/testing/selftests/kvm/include/x86_64/processor.h) | 146 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/sev.h (renamed from tools/testing/selftests/kvm/include/x86_64/sev.h) | 53 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/svm.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm.h) | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/svm_util.h (renamed from tools/testing/selftests/kvm/include/x86_64/svm_util.h) | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/ucall.h (renamed from tools/testing/selftests/kvm/include/x86_64/ucall.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/vmx.h (renamed from tools/testing/selftests/kvm/include/x86_64/vmx.h) | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/irqfd_test.c | 143 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/kvm_binary_stats_test.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/kvm_create_max_vcpus.c | 28 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic.c) | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_private.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_private.h) | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.c) | 22 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c) | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/handlers.S (renamed from tools/testing/selftests/kvm/lib/aarch64/handlers.S) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/processor.c (renamed from tools/testing/selftests/kvm/lib/aarch64/processor.c) | 193 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/spinlock.c (renamed from tools/testing/selftests/kvm/lib/aarch64/spinlock.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/ucall.c (renamed from tools/testing/selftests/kvm/lib/aarch64/ucall.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/arm64/vgic.c (renamed from tools/testing/selftests/kvm/lib/aarch64/vgic.c) | 64 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 538 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/exception.S | 65 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/processor.c | 389 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/loongarch/ucall.c | 38 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/lru_gen_util.c | 387 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/memstress.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/handlers.S | 139 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/processor.c | 68 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c (renamed from tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/facility.c | 14 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/processor.c (renamed from tools/testing/selftests/kvm/lib/s390x/processor.c) | 15 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/s390/ucall.c (renamed from tools/testing/selftests/kvm/lib/s390x/ucall.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/sparsebit.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/test_util.c | 49 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/userfaultfd_util.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/apic.c (renamed from tools/testing/selftests/kvm/lib/x86_64/apic.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/handlers.S (renamed from tools/testing/selftests/kvm/lib/x86_64/handlers.S) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/hyperv.c | 113 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/memstress.c (renamed from tools/testing/selftests/kvm/lib/x86_64/memstress.c) | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/pmu.c | 80 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/processor.c (renamed from tools/testing/selftests/kvm/lib/x86_64/processor.c) | 248 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/sev.c (renamed from tools/testing/selftests/kvm/lib/x86_64/sev.c) | 76 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/svm.c (renamed from tools/testing/selftests/kvm/lib/x86_64/svm.c) | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/ucall.c (renamed from tools/testing/selftests/kvm/lib/x86_64/ucall.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/vmx.c (renamed from tools/testing/selftests/kvm/lib/x86_64/vmx.c) | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/hyperv.c | 46 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/pmu.c | 31 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/loongarch/arch_timer.c | 200 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/memslot_modification_stress_test.c | 28 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/memslot_perf_test.c | 21 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/mmu_stress_test.c (renamed from tools/testing/selftests/kvm/max_guest_memory_test.c) | 182 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/pre_fault_memory_test.c | 239 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/riscv/arch_timer.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/riscv/ebreak_test.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/riscv/get-reg-list.c | 266 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/riscv/sbi_pmu_test.c | 107 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/rseq_test.c | 31 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/cmma_test.c (renamed from tools/testing/selftests/kvm/s390x/cmma_test.c) | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/config | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c | 301 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/debug_test.c (renamed from tools/testing/selftests/kvm/s390x/debug_test.c) | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/memop.c (renamed from tools/testing/selftests/kvm/s390x/memop.c) | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/resets.c (renamed from tools/testing/selftests/kvm/s390x/resets.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/shared_zeropage_test.c (renamed from tools/testing/selftests/kvm/s390x/shared_zeropage_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/sync_regs_test.c (renamed from tools/testing/selftests/kvm/s390x/sync_regs_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/tprot.c (renamed from tools/testing/selftests/kvm/s390x/tprot.c) | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/ucontrol_test.c | 798 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/s390/user_operexec.c | 140 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/set_memory_region_test.c | 107 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/steal_time.c | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/amx_test.c (renamed from tools/testing/selftests/kvm/x86_64/amx_test.c) | 23 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/aperfmperf_test.c | 213 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/apic_bus_clock_test.c | 194 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/cpuid_test.c) | 69 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c | 100 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/debug_regs.c (renamed from tools/testing/selftests/kvm/x86_64/debug_regs.c) | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c (renamed from tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c) | 6 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c (renamed from tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/fastops_test.c | 209 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/feature_msrs_test.c | 113 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/fix_hypercall_test.c (renamed from tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/flds_emulation.h (renamed from tools/testing/selftests/kvm/x86_64/flds_emulation.h) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hwcr_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_clock.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_clock.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_cpuid.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c) | 70 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_evmcs.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_features.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_features.c) | 18 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_ipi.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_ipi.c) | 24 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_svm_test.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c (renamed from tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/kvm_buslock_test.c | 135 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/kvm_clock_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_clock_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/kvm_pv_test.c (renamed from tools/testing/selftests/kvm/x86_64/kvm_pv_test.c) | 38 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c (renamed from tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c) | 22 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/monitor_mwait_test.c (renamed from tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c) | 115 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/msrs_test.c | 489 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c) | 42 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_emulation_test.c | 146 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_exceptions_test.c (renamed from tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c | 116 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c) | 79 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c) | 48 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/nx_huge_pages_test.c (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c) | 4 | ||||
| -rwxr-xr-x | tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh (renamed from tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/platform_info_test.c (renamed from tools/testing/selftests/kvm/x86_64/platform_info_test.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/pmu_counters_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_counters_test.c) | 257 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/pmu_event_filter_test.c (renamed from tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c) | 39 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/private_mem_conversions_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c) | 9 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c (renamed from tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/recalc_apic_map_test.c (renamed from tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/set_boot_cpu_id.c (renamed from tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c) | 16 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/set_sregs_test.c (renamed from tools/testing/selftests/kvm/x86_64/set_sregs_test.c) | 63 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/sev_init2_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_init2_tests.c) | 13 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/sev_migrate_tests.c (renamed from tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/sev_smoke_test.c (renamed from tools/testing/selftests/kvm/x86_64/sev_smoke_test.c) | 117 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c (renamed from tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/smm_test.c (renamed from tools/testing/selftests/kvm/x86_64/smm_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/state_test.c (renamed from tools/testing/selftests/kvm/x86_64/state_test.c) | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/svm_int_ctl_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c) | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/svm_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/sync_regs_test.c (renamed from tools/testing/selftests/kvm/x86_64/sync_regs_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/triple_fault_event_test.c (renamed from tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/tsc_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/tsc_scaling_sync.c (renamed from tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/ucna_injection_test.c (renamed from tools/testing/selftests/kvm/x86_64/ucna_injection_test.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/userspace_io_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_io_test.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c (renamed from tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c) | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_apic_access_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c) | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c) | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_msrs_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c | 132 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c) | 30 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_ipi_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c) | 21 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_state_test.c (renamed from tools/testing/selftests/kvm/x86_64/xapic_state_test.c) | 90 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c (renamed from tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c) | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xen_shinfo_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c) | 26 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xen_vmcall_test.c (renamed from tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c) | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/xss_msr_test.c (renamed from tools/testing/selftests/kvm/x86_64/xss_msr_test.c) | 0 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 85 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86_64/get_msr_index_features.c | 35 |
213 files changed, 13927 insertions, 2490 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..1d41a046a7bf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,8 @@ !*.h !*.S !*.sh +!.gitignore +!config +!settings +!Makefile +!Makefile.kvm
\ No newline at end of file diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ac280dcba996..f2b223072b62 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,331 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../build/Build.include - -all: - top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),x86) - ARCH_DIR := x86_64 -else ifeq ($(ARCH),arm64) - ARCH_DIR := aarch64 -else ifeq ($(ARCH),s390) - ARCH_DIR := s390x -else - ARCH_DIR := $(ARCH) -endif - -LIBKVM += lib/assert.c -LIBKVM += lib/elf.c -LIBKVM += lib/guest_modes.c -LIBKVM += lib/io.c -LIBKVM += lib/kvm_util.c -LIBKVM += lib/memstress.c -LIBKVM += lib/guest_sprintf.c -LIBKVM += lib/rbtree.c -LIBKVM += lib/sparsebit.c -LIBKVM += lib/test_util.c -LIBKVM += lib/ucall_common.c -LIBKVM += lib/userfaultfd_util.c - -LIBKVM_STRING += lib/string_override.c - -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/pmu.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/sev.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c - -LIBKVM_riscv += lib/riscv/handlers.S -LIBKVM_riscv += lib/riscv/processor.c -LIBKVM_riscv += lib/riscv/ucall.c - -# Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh - -# Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test -TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += guest_memfd_test -TEST_GEN_PROGS_x86_64 += guest_print_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += max_guest_memory_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test - -# Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress -TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += arch_timer -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += guest_print_test -TEST_GEN_PROGS_aarch64 += get-reg-list -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += s390x/debug_test -TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += guest_print_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test - -TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test -TEST_GEN_PROGS_riscv += riscv/ebreak_test -TEST_GEN_PROGS_riscv += arch_timer -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += steal_time - -SPLIT_TESTS += arch_timer -SPLIT_TESTS += get-reg-list - -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) - -OVERRIDE_TARGETS = 1 - -# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most -# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, -# which causes the environment variable to override the makefile). -include ../lib.mk - -INSTALL_HDR_PATH = $(top_srcdir)/usr -LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) +# Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include -endif -CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \ - -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ - $(KHDR_INCLUDES) -ifeq ($(ARCH),s390) - CFLAGS += -march=z10 + override ARCH := x86 endif -ifeq ($(ARCH),arm64) -tools_dir := $(top_srcdir)/tools -arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ - -ifneq ($(abs_objdir),) -arm64_hdr_outdir := $(abs_objdir)/tools/ +include Makefile.kvm else -arm64_hdr_outdir := $(tools_dir)/ -endif - -GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +# Empty targets for unsupported architectures +all: +clean: endif - -no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -# On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) - -LDLIBS += -ldl -LDFLAGS += -pthread $(no-pie-option) $(pgste-option) - -LIBKVM_C := $(filter %.c,$(LIBKVM)) -LIBKVM_S := $(filter %.S,$(LIBKVM)) -LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) -SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) -SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS)) - -TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) -TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) -TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) --include $(TEST_DEP_FILES) - -$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) - -$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ -$(TEST_GEN_PROGS_EXTENDED): %: %.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ -$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -EXTRA_CLEAN += $(GEN_HDRS) \ - $(LIBKVM_OBJS) \ - $(SPLIT_TEST_GEN_OBJ) \ - $(TEST_DEP_FILES) \ - $(TEST_GEN_OBJ) \ - cscope.* - -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -# Compile the string overrides as freestanding to prevent the compiler from -# generating self-referential code, e.g. without "freestanding" the compiler may -# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. -$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ - -$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) -$(TEST_GEN_PROGS): $(LIBKVM_OBJS) -$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) -$(TEST_GEN_OBJ): $(GEN_HDRS) - -cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. -cscope: - $(RM) cscope.* - (find $(include_paths) -name '*.h' \ - -exec realpath --relative-base=$(PWD) {} \;; \ - find . -name '*.c' \ - -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files - cscope -b diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm new file mode 100644 index 000000000000..ba5c2b643efa --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -0,0 +1,356 @@ +# SPDX-License-Identifier: GPL-2.0-only +include ../../../build/Build.include + +all: + +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/lru_gen_util.c +LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c + +LIBKVM_STRING += lib/string_override.c + +LIBKVM_x86 += lib/x86/apic.c +LIBKVM_x86 += lib/x86/handlers.S +LIBKVM_x86 += lib/x86/hyperv.c +LIBKVM_x86 += lib/x86/memstress.c +LIBKVM_x86 += lib/x86/pmu.c +LIBKVM_x86 += lib/x86/processor.c +LIBKVM_x86 += lib/x86/sev.c +LIBKVM_x86 += lib/x86/svm.c +LIBKVM_x86 += lib/x86/ucall.c +LIBKVM_x86 += lib/x86/vmx.c + +LIBKVM_arm64 += lib/arm64/gic.c +LIBKVM_arm64 += lib/arm64/gic_v3.c +LIBKVM_arm64 += lib/arm64/gic_v3_its.c +LIBKVM_arm64 += lib/arm64/handlers.S +LIBKVM_arm64 += lib/arm64/processor.c +LIBKVM_arm64 += lib/arm64/spinlock.c +LIBKVM_arm64 += lib/arm64/ucall.c +LIBKVM_arm64 += lib/arm64/vgic.c + +LIBKVM_s390 += lib/s390/diag318_test_handler.c +LIBKVM_s390 += lib/s390/processor.c +LIBKVM_s390 += lib/s390/ucall.c +LIBKVM_s390 += lib/s390/facility.c + +LIBKVM_riscv += lib/riscv/handlers.S +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c + +LIBKVM_loongarch += lib/loongarch/processor.c +LIBKVM_loongarch += lib/loongarch/ucall.c +LIBKVM_loongarch += lib/loongarch/exception.S + +# Non-compiled test targets +TEST_PROGS_x86 += x86/nx_huge_pages_test.sh + +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + +# Compiled test targets +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test +TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fastops_test +TEST_GEN_PROGS_x86 += x86/fix_hypercall_test +TEST_GEN_PROGS_x86 += x86/hwcr_msr_test +TEST_GEN_PROGS_x86 += x86/hyperv_clock +TEST_GEN_PROGS_x86 += x86/hyperv_cpuid +TEST_GEN_PROGS_x86 += x86/hyperv_evmcs +TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86 += x86/hyperv_features +TEST_GEN_PROGS_x86 += x86/hyperv_ipi +TEST_GEN_PROGS_x86 += x86/hyperv_svm_test +TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush +TEST_GEN_PROGS_x86 += x86/kvm_clock_test +TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/kvm_buslock_test +TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/msrs_test +TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test +TEST_GEN_PROGS_x86 += x86/nested_emulation_test +TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/platform_info_test +TEST_GEN_PROGS_x86 += x86/pmu_counters_test +TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test +TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test +TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id +TEST_GEN_PROGS_x86 += x86/set_sregs_test +TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86 += x86/smm_test +TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test +TEST_GEN_PROGS_x86 += x86/svm_vmcall_test +TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test +TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync +TEST_GEN_PROGS_x86 += x86/sync_regs_test +TEST_GEN_PROGS_x86 += x86/ucna_injection_test +TEST_GEN_PROGS_x86 += x86/userspace_io_test +TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test +TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test +TEST_GEN_PROGS_x86 += x86/xapic_ipi_test +TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test +TEST_GEN_PROGS_x86 += x86/xss_msr_test +TEST_GEN_PROGS_x86 += x86/debug_regs +TEST_GEN_PROGS_x86 += x86/tsc_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test +TEST_GEN_PROGS_x86 += x86/xen_shinfo_test +TEST_GEN_PROGS_x86 += x86/xen_vmcall_test +TEST_GEN_PROGS_x86 += x86/sev_init2_tests +TEST_GEN_PROGS_x86 += x86/sev_migrate_tests +TEST_GEN_PROGS_x86 += x86/sev_smoke_test +TEST_GEN_PROGS_x86 += x86/amx_test +TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test +TEST_GEN_PROGS_x86 += x86/triple_fault_event_test +TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test +TEST_GEN_PROGS_x86 += access_tracking_perf_test +TEST_GEN_PROGS_x86 += coalesced_io_test +TEST_GEN_PROGS_x86 += dirty_log_perf_test +TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += hardware_disable_test +TEST_GEN_PROGS_x86 += memslot_modification_stress_test +TEST_GEN_PROGS_x86 += memslot_perf_test +TEST_GEN_PROGS_x86 += mmu_stress_test +TEST_GEN_PROGS_x86 += rseq_test +TEST_GEN_PROGS_x86 += steal_time +TEST_GEN_PROGS_x86 += system_counter_offset_test +TEST_GEN_PROGS_x86 += pre_fault_memory_test + +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test + +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs +TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/at +TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hello_el2 +TEST_GEN_PROGS_arm64 += arm64/host_sve +TEST_GEN_PROGS_arm64 += arm64/hypercalls +TEST_GEN_PROGS_arm64 += arm64/external_aborts +TEST_GEN_PROGS_arm64 += arm64/page_fault_test +TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/sea_to_user +TEST_GEN_PROGS_arm64 += arm64/set_id_regs +TEST_GEN_PROGS_arm64 += arm64/smccc_filter +TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config +TEST_GEN_PROGS_arm64 += arm64/vgic_init +TEST_GEN_PROGS_arm64 += arm64/vgic_irq +TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access +TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid +TEST_GEN_PROGS_arm64 += access_tracking_perf_test +TEST_GEN_PROGS_arm64 += arch_timer +TEST_GEN_PROGS_arm64 += coalesced_io_test +TEST_GEN_PROGS_arm64 += dirty_log_perf_test +TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += guest_memfd_test +TEST_GEN_PROGS_arm64 += memslot_modification_stress_test +TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += mmu_stress_test +TEST_GEN_PROGS_arm64 += rseq_test +TEST_GEN_PROGS_arm64 += steal_time + +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop +TEST_GEN_PROGS_s390 += s390/resets +TEST_GEN_PROGS_s390 += s390/sync_regs_test +TEST_GEN_PROGS_s390 += s390/tprot +TEST_GEN_PROGS_s390 += s390/cmma_test +TEST_GEN_PROGS_s390 += s390/debug_test +TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390 += s390/shared_zeropage_test +TEST_GEN_PROGS_s390 += s390/ucontrol_test +TEST_GEN_PROGS_s390 += s390/user_operexec +TEST_GEN_PROGS_s390 += rseq_test + +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += access_tracking_perf_test +TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += dirty_log_perf_test +TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += memslot_modification_stress_test +TEST_GEN_PROGS_riscv += memslot_perf_test +TEST_GEN_PROGS_riscv += mmu_stress_test +TEST_GEN_PROGS_riscv += rseq_test +TEST_GEN_PROGS_riscv += steal_time + +TEST_GEN_PROGS_loongarch = arch_timer +TEST_GEN_PROGS_loongarch += coalesced_io_test +TEST_GEN_PROGS_loongarch += demand_paging_test +TEST_GEN_PROGS_loongarch += dirty_log_perf_test +TEST_GEN_PROGS_loongarch += dirty_log_test +TEST_GEN_PROGS_loongarch += guest_print_test +TEST_GEN_PROGS_loongarch += hardware_disable_test +TEST_GEN_PROGS_loongarch += kvm_binary_stats_test +TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus +TEST_GEN_PROGS_loongarch += kvm_page_table_test +TEST_GEN_PROGS_loongarch += memslot_modification_stress_test +TEST_GEN_PROGS_loongarch += memslot_perf_test +TEST_GEN_PROGS_loongarch += set_memory_region_test + +SPLIT_TESTS += arch_timer +SPLIT_TESTS += get-reg-list + +TEST_PROGS += $(TEST_PROGS_$(ARCH)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) +LIBKVM += $(LIBKVM_$(ARCH)) + +OVERRIDE_TARGETS = 1 + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk +include ../cgroup/lib/libcgroup.mk + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +ifeq ($(ARCH),s390) + CFLAGS += -march=z10 +endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif + +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + +LDLIBS += -ldl +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) + +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O) +SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) +-include $(TEST_DEP_FILES) + +$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) + +$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ +$(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ +$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TEST_GEN_OBJ) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* + +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + +$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..b058f27b2141 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -7,9 +7,11 @@ * This test measures the performance effects of KVM's access tracking. * Access tracking is driven by the MMU notifiers test_young, clear_young, and * clear_flush_young. These notifiers do not have a direct userspace API, - * however the clear_young notifier can be triggered by marking a pages as idle - * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to - * enable access tracking on guest memory. + * however the clear_young notifier can be triggered either by + * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR + * 2. adding a new MGLRU generation using the lru_gen debugfs file. + * This test leverages page_idle to enable access tracking on guest memory + * unless MGLRU is enabled, in which case MGLRU is used. * * To measure performance this test runs a VM with a configurable number of * vCPUs that each touch every page in disjoint regions of memory. Performance @@ -17,10 +19,11 @@ * predefined region. * * Note that a deterministic correctness test of access tracking is not possible - * by using page_idle as it exists today. This is for a few reasons: + * by using page_idle or MGLRU aging as it exists today. This is for a few + * reasons: * - * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This - * means subsequent guest accesses are not guaranteed to see page table + * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush. + * This means subsequent guest accesses are not guaranteed to see page table * updates made by KVM until some time in the future. * * 2. page_idle only operates on LRU pages. Newly allocated pages are not @@ -47,10 +50,19 @@ #include "memstress.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +#include "cgroup_util.h" +#include "lru_gen_util.h" + +static const char *TEST_MEMCG_NAME = "access_tracking_perf_test"; /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; +/* The cgroup memory controller root. Needed for lru_gen-based aging. */ +char cgroup_root[PATH_MAX]; + /* Defines what vCPU threads should do during a given iteration. */ static enum { /* Run the vCPU to access all its memory. */ @@ -65,6 +77,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access; +/* + * If the test should only warn if there are too many idle pages (i.e., it is + * expected). + * -1: Not yet set. + * 0: We do not expect too many idle pages, so FAIL if too many idle pages. + * 1: Having too many idle pages is expected, so merely print a warning if + * too many idle pages are found. + */ +static int idle_pages_warn_only = -1; + +/* Whether or not to use MGLRU instead of page_idle for access tracking */ +static bool use_lru_gen; + +/* Total number of pages to expect in the memcg after touching everything */ +static long test_pages; + +/* Last generation we found the pages in */ +static int lru_gen_last_gen = -1; + struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -123,8 +154,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) "Set page_idle bits for PFN 0x%" PRIx64, pfn); } -static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct memstress_vcpu_args *vcpu_args) +static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx) +{ + char prefix[18] = {}; + + if (vcpu_idx >= 0) + snprintf(prefix, 18, "vCPU%d: ", vcpu_idx); + + TEST_ASSERT(idle_pages_warn_only, + "%sToo many pages still idle (%lu out of %lu)", + prefix, idle_pages, total_pages); + + printf("WARNING: %sToo many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", + prefix, idle_pages, total_pages); +} + +static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -177,27 +224,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. - * - * When running the guest as a nested VM, "warn" instead of asserting - * as the TLB size is effectively unlimited and the KVM doesn't - * explicitly flush the TLB when aging SPTEs. As a result, more pages - * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle >= pages / 10) { -#ifdef __x86_64__ - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), - "vCPU%d: Too many pages still idle (%lu out of %lu)", - vcpu_idx, still_idle, pages); -#endif - printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " - "this will affect performance results.\n", - vcpu_idx, still_idle, pages); - } + if (still_idle >= pages / 10) + too_many_idle_pages(still_idle, pages, + overlap_memory_access ? -1 : vcpu_idx); close(page_idle_fd); close(pagemap_fd); } +int find_generation(struct memcg_stats *stats, long total_pages) +{ + /* + * For finding the generation that contains our pages, use the same + * 90% threshold that page_idle uses. + */ + int gen = lru_gen_find_generation(stats, total_pages * 9 / 10); + + if (gen >= 0) + return gen; + + if (!idle_pages_warn_only) { + TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).", + total_pages * 9 / 10); + return gen; + } + + /* + * We couldn't find a generation with 90% of guest memory, which can + * happen if access tracking is unreliable. Simply look for a majority + * of pages. + */ + puts("WARNING: Couldn't find a generation with 90% of guest memory. " + "Performance results may not be accurate."); + gen = lru_gen_find_generation(stats, total_pages / 2); + TEST_ASSERT(gen >= 0, + "Could not find a generation with 50%% of guest memory (%ld pages).", + total_pages / 2); + return gen; +} + +static void lru_gen_mark_memory_idle(struct kvm_vm *vm) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + struct memcg_stats stats; + int new_gen; + + /* Make a new generation */ + clock_gettime(CLOCK_MONOTONIC, &ts_start); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + ts_elapsed = timespec_elapsed(ts_start); + + /* Check the generation again */ + new_gen = find_generation(&stats, test_pages); + + /* + * This function should only be invoked with newly-accessed pages, + * so pages should always move to a newer generation. + */ + if (new_gen <= lru_gen_last_gen) { + /* We did not move to a newer generation. */ + long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen, + &stats); + + too_many_idle_pages(min_t(long, idle_pages, test_pages), + test_pages, -1); + } + pr_info("%-30s: %ld.%09lds\n", + "Mark memory idle (lru_gen)", ts_elapsed.tv_sec, + ts_elapsed.tv_nsec); + lru_gen_last_gen = new_gen; +} + static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) { struct ucall uc; @@ -237,9 +336,9 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) assert_ucall(vcpu, UCALL_SYNC); break; case ITERATION_MARK_IDLE: - mark_vcpu_memory_idle(vm, vcpu_args); + pageidle_mark_vcpu_memory_idle(vm, vcpu_args); break; - }; + } vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } @@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus, static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus) { + if (use_lru_gen) + return lru_gen_mark_memory_idle(vm); + /* * Even though this parallelizes the work across vCPUs, this is still a * very slow operation because page_idle forces the test to mark one pfn - * at a time and the clear_young notifier serializes on the KVM MMU + * at a time and the clear_young notifier may serialize on the KVM MMU * lock. */ pr_debug("Marking VM memory idle (slow)...\n"); iteration_work = ITERATION_MARK_IDLE; - run_iteration(vm, nr_vcpus, "Mark memory idle"); + run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)"); } static void run_test(enum vm_guest_mode mode, void *arg) @@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); + /* + * If guest_page_size is larger than the host's page size, the + * guest (memstress) will only fault in a subset of the host's pages. + */ + test_pages = params->nr_vcpus * params->vcpu_memory_bytes / + max(memstress_args.guest_page_size, + (uint64_t)getpagesize()); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); + if (use_lru_gen) { + struct memcg_stats stats; + + /* + * Do a page table scan now. Following initial population, aging + * may not cause the pages to move to a newer generation. Do + * an aging pass now so that future aging passes always move + * pages to a newer generation. + */ + printf("Initial aging pass (lru_gen)\n"); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages, + "Not all pages accounted for (looking for %ld). " + "Was the memcg set up correctly?", test_pages); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory"); + lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME); + lru_gen_last_gen = find_generation(&stats, test_pages); + } + /* As a control, read and write to the populated memory first. */ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory"); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory"); @@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg) memstress_destroy_vm(vm); } +static int access_tracking_unreliable(void) +{ +#ifdef __x86_64__ + /* + * When running nested, the TLB size may be effectively unlimited (for + * example, this is the case when running on KVM L0), and KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. + */ + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { + puts("Skipping idle page count sanity check, because the test is run nested"); + return 1; + } +#endif + /* + * When NUMA balancing is enabled, guest memory will be unmapped to get + * NUMA faults, dropping the Accessed bits. + */ + if (is_numa_balancing_enabled()) { + puts("Skipping idle page count sanity check, because NUMA balancing is enabled"); + return 1; + } + return 0; +} + +static int run_test_for_each_guest_mode(const char *cgroup, void *arg) +{ + for_each_guest_mode(run_test, arg); + return 0; +} + static void help(char *name) { puts(""); @@ -342,11 +502,22 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -w: Control whether the test warns or fails if more than 10%%\n" + " of pages are still seen as idle/old after accessing guest\n" + " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" + " mode, the test fails by default, but switches to warn only\n" + " if NUMA balancing is enabled or the test detects it's running\n" + " in a VM.\n"); backing_src_help("-s"); puts(""); exit(0); } +void destroy_cgroup(char *cg) +{ + printf("Destroying cgroup: %s\n", cg); +} + int main(int argc, char *argv[]) { struct test_params params = { @@ -354,12 +525,13 @@ int main(int argc, char *argv[]) .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .nr_vcpus = 1, }; + char *new_cg = NULL; int page_idle_fd; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -376,6 +548,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break; + case 'w': + idle_pages_warn_only = + atoi_non_negative("Idle pages warning", + optarg); + break; case 'h': default: help(argv[0]); @@ -383,12 +560,50 @@ int main(int argc, char *argv[]) } } - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "CONFIG_IDLE_PAGE_TRACKING is not enabled"); - close(page_idle_fd); + if (idle_pages_warn_only == -1) + idle_pages_warn_only = access_tracking_unreliable(); + + if (lru_gen_usable()) { + bool cg_created = true; + int ret; - for_each_guest_mode(run_test, ¶ms); + puts("Using lru_gen for aging"); + use_lru_gen = true; + + if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory")) + ksft_exit_skip("Cannot find memory cgroup controller\n"); + + new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME); + printf("Creating cgroup: %s\n", new_cg); + if (cg_create(new_cg)) { + if (errno == EEXIST) { + printf("Found existing cgroup"); + cg_created = false; + } else { + ksft_exit_skip("could not create new cgroup: %s\n", new_cg); + } + } + + /* + * This will fork off a new process to run the test within + * a new memcg, so we need to properly propagate the return + * value up. + */ + ret = cg_run(new_cg, &run_test_for_each_guest_mode, ¶ms); + if (cg_created) + cg_destroy(new_cg); + if (ret < 0) + TEST_FAIL("child did not spawn or was abnormally killed"); + if (ret) + return ret; + } else { + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); + close(page_idle_fd); + + puts("Using page_idle for aging"); + run_test_for_each_guest_mode(NULL, ¶ms); + } return 0; } diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index acb2cb596332..cf8fb67104f1 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - cpu_set_t cpuset; uint32_t new_pcpu = test_get_pcpu(); - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index 8e5bd07a3727..713005b6f508 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_wi_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); /* @@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_invariant_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg_id, &val); + val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); } } @@ -144,10 +144,10 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { uint64_t val, el0; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); + return el0 == ID_AA64PFR0_EL1_EL0_IMP; } int main(void) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..d592a4515399 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -165,10 +165,8 @@ static void guest_code(void) static void test_init_timer_irq(struct kvm_vm *vm) { /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; int nr_vcpus = test_args.nr_vcpus; + TEST_REQUIRE(kvm_supports_vgic_v3()); + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); vm_init_descriptor_tables(vm); @@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); @@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void) void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..993c9e38e729 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -0,0 +1,1059 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +/* Depends on counter width. */ +static uint64_t CVAL_MAX; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static cpu_set_t default_cpuset; + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset = default_cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + pin_self_to_cpu(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + pin_self_to_cpu(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + ptimer_irq = vcpu_get_ptimer_irq(vcpu); + vtimer_irq = vcpu_get_vtimer_irq(vcpu); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + + sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); +} + +static void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + int width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_supports_vgic_v3()); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + test_vm_cleanup(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + test_vm_cleanup(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c new file mode 100644 index 000000000000..c8ee6f520734 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes. + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +#define TEST_ADDR 0x80000000 + +enum { + CLEAR_ACCESS_FLAG, + TEST_ACCESS_FLAG, +}; + +static u64 *ptep_hva; + +#define copy_el2_to_el1(reg) \ + write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12) + +/* Yes, this is an ugly hack */ +#define __at(op, addr) write_sysreg_s(addr, op) + +#define test_at_insn(op, expect_fault) \ +do { \ + u64 par, fsc; \ + bool fault; \ + \ + GUEST_SYNC(CLEAR_ACCESS_FLAG); \ + \ + __at(OP_AT_##op, TEST_ADDR); \ + isb(); \ + par = read_sysreg(par_el1); \ + \ + fault = par & SYS_PAR_EL1_F; \ + fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \ + \ + __GUEST_ASSERT((expect_fault) == fault, \ + "AT "#op": %sexpected fault (par: %lx)1", \ + (expect_fault) ? "" : "un", par); \ + if ((expect_fault)) { \ + __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \ + "AT "#op": expected access flag fault (par: %lx)", \ + par); \ + } else { \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ + GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ + GUEST_SYNC(TEST_ACCESS_FLAG); \ + } \ +} while (0) + +static void test_at(bool expect_fault) +{ + test_at_insn(S1E2R, expect_fault); + test_at_insn(S1E2W, expect_fault); + + /* Reuse the stage-1 MMU context from EL2 at EL1 */ + copy_el2_to_el1(SCTLR); + copy_el2_to_el1(MAIR); + copy_el2_to_el1(TCR); + copy_el2_to_el1(TTBR0); + copy_el2_to_el1(TTBR1); + + /* Disable stage-2 translation and enter a non-host context */ + write_sysreg(0, vtcr_el2); + write_sysreg(0, vttbr_el2); + sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0); + isb(); + + test_at_insn(S1E1R, expect_fault); + test_at_insn(S1E1W, expect_fault); +} + +static void guest_code(void) +{ + sysreg_clear_set(tcr_el1, TCR_HA, 0); + isb(); + + test_at(true); + + if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) + GUEST_DONE(); + + /* + * KVM's software PTW makes the implementation choice that the AT + * instruction sets the access flag. + */ + sysreg_clear_set(tcr_el1, 0, TCR_HA); + isb(); + test_at(false); + + GUEST_DONE(); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + switch (uc->args[1]) { + case CLEAR_ACCESS_FLAG: + /* + * Delete + reinstall the memslot to invalidate stage-2 + * mappings of the stage-1 page tables, forcing KVM to + * use the 'slow' AT emulation path. + * + * This and clearing the access flag from host userspace + * ensures that the access flag cannot be set speculatively + * and is reliably cleared at the time of the AT instruction. + */ + clear_bit(__ffs(PTE_AF), ptep_hva); + vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); + break; + case TEST_ACCESS_FLAG: + TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva), + "Expected access flag to be set (desc: %lu)", *ptep_hva); + break; + default: + TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); + } +} + +static void run_test(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + return; + case UCALL_SYNC: + handle_sync(vcpu, &uc); + continue; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + virt_map(vm, TEST_ADDR, TEST_ADDR, 1); + ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3); + run_test(vcpu); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index 2582c49e525a..1d431de8729c 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt) static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -433,15 +433,15 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_BRK_INS, guest_sw_bp_handler); + ESR_ELx_EC_BRK64, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_WP_CURRENT, guest_wp_handler); + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SSTEP_CURRENT, guest_ss_handler); + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SVC64, guest_svc_handler); + ESR_ELx_EC_SVC64, guest_svc_handler); /* Specify bpn/wpn/ctx_bpn to be tested */ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); @@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); /* Check if the current pc is expected. */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); TEST_ASSERT(!test_pc || pc == test_pc, "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..d8fe17a6cc59 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); + test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 709d7d721760..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -15,6 +15,12 @@ #include "test_util.h" #include "processor.h" +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -22,25 +28,48 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, - 1 +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v + +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } + +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), + REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) @@ -320,9 +349,21 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ @@ -456,6 +497,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ @@ -468,6 +510,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -475,6 +518,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ @@ -671,6 +715,67 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), +}; + +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -697,6 +802,23 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -746,6 +868,124 @@ static struct vcpu_reg_list pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -753,5 +993,19 @@ struct vcpu_reg_list *vcpu_configs[] = { &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c new file mode 100644 index 000000000000..bbe6862c6ab1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hello_el2.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 + * + * Copyright 2025 Google LLC + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +static void guest_code(void) +{ + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); + + GUEST_ASSERT_EQ(get_current_el(), 2); + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), + ID_AA64MMFR1_EL1_VH_IMP); + + /* + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, + * which is really annoying to deal with in KVM describing E2H as RES1. + * + * If the implementation doesn't honor the trap then expect the register + * to return all zeros. + */ + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), + ID_AA64MMFR0_EL1_FGT_NI); + else + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); + + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 9d192ce0078d..bf038a0371f4 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -21,22 +21,31 @@ #define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 #define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 #define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1 + +#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX) +#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0 struct kvm_fw_reg_info { uint64_t reg; /* Register definition */ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + uint64_t reset_val; /* Reset value for the register */ }; #define FW_REG_INFO(r) \ { \ .reg = r, \ .max_feat_bit = r##_BIT_MAX, \ + .reset_val = r##_RESET_VAL \ } static const struct kvm_fw_reg_info fw_reg_info[] = { FW_REG_INFO(KVM_REG_ARM_STD_BMAP), FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2), }; enum test_stage { @@ -99,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: @@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + uint64_t set_val; + + /* First 'read' should be the reset value for the reg */ + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == reg_info->reset_val, + "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", + reg_info->reg, reg_info->reset_val, val); - /* First 'read' should be an upper limit of the features supported */ - vcpu_get_reg(vcpu, reg_info->reg, &val); - TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", - reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + if (reg_info->reset_val) + set_val = 0; + else + set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit); - /* Test a 'write' by disabling all the features of the register map */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val); TEST_ASSERT(ret == 0, + "Failed to %s all the features of reg: 0x%lx; ret: %d", + (set_val ? "set" : "clear"), reg_info->reg, errno); + + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == set_val, + "Expected all the features to be %s for reg: 0x%lx", + (set_val ? "set" : "cleared"), reg_info->reg); + + /* + * If the reg has been set, clear it as test_fw_regs_after_vm_start() + * expects it to be cleared. + */ + if (set_val) { + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + TEST_ASSERT(ret == 0, "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); - - vcpu_get_reg(vcpu, reg_info->reg, &val); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); + } /* * Test enabling a feature that's not supported. @@ -214,7 +240,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..b5be9133535a --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include <errno.h> +#include <linux/arm-smccc.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c new file mode 100644 index 000000000000..152c34776981 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + test_disable_default_vgic(); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index d29b08198b42..4ccbd389d133 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -95,14 +95,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -195,11 +195,11 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); @@ -544,9 +544,9 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_DABT, no_dabt_handler); + ESR_ELx_EC_DABT_CUR, no_dabt_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_IABT, no_iabt_handler); + ESR_ELx_EC_IABT_CUR, no_iabt_handler); } static void setup_gva_maps(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index 61731a950def..98e49f710aef 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 0, 0, 0, 0, &res); return res.a0; @@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 0, 0, 0, 0, 0, &res); return res.a0; @@ -48,17 +48,26 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 0, 0, 0, 0, 0, &res); return res.a0; } +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -80,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, vm = vm_create(2); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *source = aarch64_vcpu_add(vm, 0, &init, guest_code); *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -102,8 +112,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -143,7 +153,7 @@ static void host_test_cpu_on(void) */ vcpu_power_off(target); - vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); enter_guest(source); @@ -188,11 +198,94 @@ static void host_test_system_suspend(void) kvm_vm_free(vm); } +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + int main(void) { TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); + host_test_system_off2(); return 0; } diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c new file mode 100644 index 000000000000..573dd790aeb8 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails + * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER. + * + * After reaching userspace with expected arm_sea info, also test userspace + * injecting a synchronous external data abort into the guest. + * + * This test utilizes EINJ to generate a REAL synchronous external data + * abort by consuming a recoverable uncorrectable memory error. Therefore + * the device under test must support EINJ in both firmware and host kernel, + * including the notrigger feature. Otherwise the test will be skipped. + * The under-test platform's APEI should be unable to claim SEA. Otherwise + * the test will also be skipped. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "guest_modes.h" + +#define PAGE_PRESENT (1ULL << 63) +#define PAGE_PHYSICAL 0x007fffffffffffffULL +#define PAGE_ADDR_MASK (~(0xfffULL)) + +/* Group ISV and ISS[23:14]. */ +#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \ + (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \ + (ESR_ELx_SF) | (ESR_ELx_AR)) + +#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type" +#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1" +#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2" +#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" +#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" +#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" +/* Memory Uncorrectable non-fatal. */ +#define ERROR_TYPE_MEMORY_UER 0x10 +/* Memory address and mask valid (param1 and param2). */ +#define MASK_MEMORY_UER 0b10 + +/* Guest virtual address region = [2G, 3G). */ +#define START_GVA 0x80000000UL +#define VM_MEM_SIZE 0x40000000UL +/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */ +#define EINJ_OFFSET 0x01234badUL +#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET)) + +static vm_paddr_t einj_gpa; +static void *einj_hva; +static uint64_t einj_hpa; +static bool far_invalid; + +static uint64_t translate_to_host_paddr(unsigned long vaddr) +{ + uint64_t pinfo; + int64_t offset = vaddr / getpagesize() * sizeof(pinfo); + int fd; + uint64_t page_addr; + uint64_t paddr; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) + ksft_exit_fail_perror("Failed to open /proc/self/pagemap"); + if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { + close(fd); + ksft_exit_fail_perror("Failed to read /proc/self/pagemap"); + } + + close(fd); + + if ((pinfo & PAGE_PRESENT) == 0) + ksft_exit_fail_perror("Page not present"); + + page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT; + paddr = page_addr + (vaddr & (getpagesize() - 1)); + return paddr; +} + +static void write_einj_entry(const char *einj_path, uint64_t val) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + sprintf(cmd, "echo %#lx > %s", val, einj_path); + cmdfile = popen(cmd, "r"); + + if (pclose(cmdfile) == 0) + ksft_print_msg("echo %#lx > %s - done\n", val, einj_path); + else + ksft_exit_fail_perror("Failed to write EINJ entry"); +} + +static void inject_uer(uint64_t paddr) +{ + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) + ksft_test_result_skip("EINJ table no available in firmware"); + + if (access(EINJ_ETYPE, R_OK | W_OK) == -1) + ksft_test_result_skip("EINJ module probably not loaded?"); + + write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER); + write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER); + write_einj_entry(EINJ_ADDR, paddr); + write_einj_entry(EINJ_MASK, ~0x0UL); + write_einj_entry(EINJ_NOTRIGGER, 1); + write_einj_entry(EINJ_DOIT, 1); +} + +/* + * When host APEI successfully claims the SEA caused by guest_code, kernel + * will send SIGBUS signal with BUS_MCEERR_AR to test thread. + * + * We set up this SIGBUS handler to skip the test for that case. + */ +static void sigbus_signal_handler(int sig, siginfo_t *si, void *v) +{ + ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig); + ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n", + si->si_signo, si->si_errno, si->si_code, si->si_addr); + if (si->si_code == BUS_MCEERR_AR) + ksft_test_result_skip("SEA is claimed by host APEI\n"); + else + ksft_test_result_fail("Exit with signal unhandled\n"); + + exit(0); +} + +static void setup_sigbus_handler(void) +{ + struct sigaction act; + + memset(&act, 0, sizeof(act)); + sigemptyset(&act.sa_mask); + act.sa_sigaction = sigbus_signal_handler; + act.sa_flags = SA_SIGINFO; + TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0, + "Failed to setup SIGBUS handler"); +} + +static void guest_code(void) +{ + uint64_t guest_data; + + /* Consumes error will cause a SEA. */ + guest_data = *(uint64_t *)EINJ_GVA; + + GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n", + EINJ_GVA, guest_data); +} + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + u64 far = read_sysreg(far_el1); + bool expect_far_invalid = far_invalid; + + GUEST_PRINTF("Handling Guest SEA\n"); + GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + if (expect_far_invalid) { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV); + GUEST_PRINTF("Guest observed garbage value in FAR\n"); + } else { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0); + GUEST_ASSERT_EQ(far, EINJ_GVA); + } + + GUEST_DONE(); +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + bool guest_done = false; + struct kvm_run *run = vcpu->run; + u64 esr; + + /* Resume the vCPU after error injection to consume the error. */ + vcpu_run(vcpu); + + ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n", + exit_reason_str(run->exit_reason)); + ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n", + run->arm_sea.esr, run->arm_sea.flags); + ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n", + run->arm_sea.gva, run->arm_sea.gpa); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA); + + esr = run->arm_sea.esr; + TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW); + TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0); + TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0); + TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0); + + if (!(esr & ESR_ELx_FnV)) { + ksft_print_msg("Expect gva to match given FnV bit is 0\n"); + TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA); + } + + if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) { + ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n"); + TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK); + } + + far_invalid = esr & ESR_ELx_FnV; + + /* Inject a SEA into guest and expect handled in SEA handler. */ + vcpu_inject_sea(vcpu); + + /* Expect the guest to reach GUEST_DONE gracefully. */ + do { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_PRINTF: + ksft_print_msg("From guest: %s", uc.buffer); + break; + case UCALL_DONE: + ksft_print_msg("Guest done gracefully!\n"); + guest_done = 1; + break; + case UCALL_ABORT: + ksft_print_msg("Guest aborted!\n"); + guest_done = 1; + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + } + } while (!guest_done); +} + +static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) +{ + size_t backing_page_size; + size_t guest_page_size; + size_t alignment; + uint64_t num_guest_pages; + vm_paddr_t start_gpa; + enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; + struct kvm_vm *vm; + + backing_page_size = get_backing_src_pagesz(src_type); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + alignment = max(backing_page_size, guest_page_size); + num_guest_pages = VM_MEM_SIZE / guest_page_size; + + vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + + vm_install_sync_handler(vm, + /*vector=*/VECTOR_SYNC_CURRENT, + /*ec=*/ESR_ELx_EC_DABT_CUR, + /*handler=*/expect_sea_handler); + + start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size; + start_gpa = align_down(start_gpa, alignment); + + vm_userspace_mem_region_add( + /*vm=*/vm, + /*src_type=*/src_type, + /*guest_paddr=*/start_gpa, + /*slot=*/1, + /*npages=*/num_guest_pages, + /*flags=*/0); + + virt_map(vm, START_GVA, start_gpa, num_guest_pages); + + ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n", + num_guest_pages, START_GVA, start_gpa); + return vm; +} + +static void vm_inject_memory_uer(struct kvm_vm *vm) +{ + uint64_t guest_data; + + einj_gpa = addr_gva2gpa(vm, EINJ_GVA); + einj_hva = addr_gva2hva(vm, EINJ_GVA); + + /* Populate certain data before injecting UER. */ + *(uint64_t *)einj_hva = 0xBAADCAFE; + guest_data = *(uint64_t *)einj_hva; + ksft_print_msg("Before EINJect: data=%#lx\n", + guest_data); + + einj_hpa = translate_to_host_paddr((unsigned long)einj_hva); + + ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n", + EINJ_GVA, einj_gpa, einj_hva, einj_hpa); + + inject_uer(einj_hpa); + ksft_print_msg("Memory UER EINJected\n"); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER)); + + setup_sigbus_handler(); + + vm = vm_create_with_sea_handler(&vcpu); + vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0); + vm_inject_memory_uer(vm); + run_vm(vm, vcpu); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index a7de39fa2a0a..c4815d365816 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -68,6 +68,8 @@ struct test_feature_reg { } static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), REG_FTR_END, @@ -121,28 +123,46 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), REG_FTR_END, }; static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), REG_FTR_END, }; @@ -150,7 +170,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), @@ -173,6 +195,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -198,10 +228,13 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -214,11 +247,20 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); + GUEST_REG_SYNC(SYS_CTR_EL0); + GUEST_REG_SYNC(SYS_MIDR_EL1); + GUEST_REG_SYNC(SYS_REVIDR_EL1); + GUEST_REG_SYNC(SYS_AIDR_EL1); GUEST_DONE(); } @@ -226,7 +268,9 @@ static void guest_code(void) /* Return a safe value to a given ftr_bits an ftr value */ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { @@ -278,7 +322,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) /* Return an invalid value to a given ftr_bits an ftr value */ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { @@ -334,7 +380,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t mask = ftr_bits->mask; uint64_t val, new_val, ftr; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_safe_value(ftr_bits, ftr); @@ -344,7 +390,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, val |= ftr; vcpu_set_reg(vcpu, reg, val); - vcpu_get_reg(vcpu, reg, &new_val); + new_val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(new_val, val); return new_val; @@ -358,7 +404,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, uint64_t val, old_val, ftr; int r; - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; ftr = get_invalid_value(ftr_bits, ftr); @@ -372,7 +418,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT(r < 0 && errno == EINVAL, "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg, &val); + val = vcpu_get_reg(vcpu, reg); TEST_ASSERT_EQ(val, old_val); } @@ -431,6 +477,164 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) } } +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + +#define MTE_IDREG_TEST 1 +static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + uint64_t mte; + uint64_t mte_frac; + int idx, err; + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + if (!mte) { + ksft_test_result_skip("MTE capability not supported, nothing to test\n"); + return; + } + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n"); + return; + } + + /* + * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2) + * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported + * and MTE_frac == 0 indicates it is supported. + * + * As MTE_frac was previously unconditionally read as 0, check + * that the set to 0 succeeds but does not change MTE_frac + * from unsupported (0xF) to supported (0). + * + */ + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || + mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { + ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); + return; + } + + /* Try to set MTE_frac=0. */ + val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) { + ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n"); + return; + } + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); + else + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -469,10 +673,10 @@ static void test_clidr(struct kvm_vcpu *vcpu) uint64_t clidr; int level; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ - for (level = 1; level < 7; level++) { + for (level = 1; level <= 7; level++) { if (!CLIDR_CTYPE(clidr, level)) break; } @@ -490,17 +694,44 @@ static void test_clidr(struct kvm_vcpu *vcpu) test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; } -static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +static void test_ctr(struct kvm_vcpu *vcpu) +{ + u64 ctr; + + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); + ctr &= ~CTR_EL0_DIC_MASK; + if (ctr & CTR_EL0_IminLine_MASK) + ctr--; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); + test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; +} + +static void test_id_reg(struct kvm_vcpu *vcpu, u32 id) { u64 val; + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id)); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val); + test_reg_vals[encoding_to_range_idx(id)] = val; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ test_clidr(vcpu); + test_ctr(vcpu); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); - val++; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + test_id_reg(vcpu, SYS_MPIDR_EL1); + ksft_test_result_pass("%s\n", __func__); +} + +static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + test_id_reg(vcpu, SYS_MIDR_EL1); + test_id_reg(vcpu, SYS_REVIDR_EL1); + test_id_reg(vcpu, SYS_AIDR_EL1); - test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; ksft_test_result_pass("%s\n", __func__); } @@ -509,7 +740,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin size_t idx = encoding_to_range_idx(encoding); uint64_t observed; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); TEST_ASSERT_EQ(test_reg_vals[idx], observed); } @@ -524,7 +755,12 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); + test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1); ksft_test_result_pass("%s\n", __func__); } @@ -535,30 +771,37 @@ int main(void) struct kvm_vm *vm; bool aarch64_only; uint64_t val, el0; - int test_cnt; + int test_cnt, i, j; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); + + test_wants_mte(); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); + vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); /* Check for AARCH64 only system */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); - aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); + aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs) + 2; + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + for (i = 0; i < ARRAY_SIZE(test_regs); i++) + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) + test_cnt++; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_vcpu_non_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); + test_user_set_mte_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..1763b9d45400 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -22,8 +22,20 @@ enum smccc_conduit { SMC_INSN, }; +static bool test_runs_at_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + struct kvm_vcpu_init init; + + kvm_get_default_vcpu_target(vm, &init); + kvm_vm_free(vm); + + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + #define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ + conduit <= SMC_INSN; conduit++) static void guest_main(uint32_t func_id, enum smccc_conduit conduit) { @@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) struct kvm_vm *vm; vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); /* * Enable in-kernel emulation of PSCI to ensure that calls are denied @@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c index 80b74c6f152b..80b74c6f152b 100644 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..8d6d3a4ae4db 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -9,17 +9,18 @@ #include <asm/kvm.h> #include <asm/kvm_para.h> +#include <arm64/gic_v3.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -675,6 +676,44 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ @@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + void run_tests(uint32_t gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); @@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); + test_v3_nassgicap(); } } @@ -739,6 +994,8 @@ int main(int ac, char **av) int pa_bits; int cnt_impl = 0; + test_disable_default_vgic(); + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index a51dbd2a5f84..2fb2c7939fe9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -29,6 +29,7 @@ struct test_args { bool level_sensitive; /* 1 is level, 0 is edge */ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ + uint32_t shared_data; }; /* @@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, do { \ uint32_t _intid; \ _intid = gic_get_and_ack_irq(); \ - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ + GUEST_ASSERT(_intid == IAR_SPURIOUS); \ } while (0) #define CAT_HELPER(a, b) a ## b @@ -269,13 +270,12 @@ static void guest_inject(struct test_args *args, KVM_INJECT_MULTI(cmd, first_intid, num); while (irq_handled < num) { - asm volatile("wfi\n" - "msr daifclr, #2\n" - /* handle IRQ */ - "msr daifset, #2\n" - : : : "memory"); + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); } - asm volatile("msr daifclr, #2" : : : "memory"); + local_irq_enable(); GUEST_ASSERT_EQ(irq_handled, num); for (i = first_intid; i < num + first_intid; i++) @@ -360,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void) * interrupts for the whole test. */ static void test_inject_preemption(struct test_args *args, - uint32_t first_intid, int num, - kvm_inject_cmd cmd) + uint32_t first_intid, int num, + const unsigned long *exclude, + kvm_inject_cmd cmd) { uint32_t intid, prio, step = KVM_PRIO_STEPS; int i; @@ -380,6 +381,10 @@ static void test_inject_preemption(struct test_args *args, for (i = 0; i < num; i++) { uint32_t tmp; intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + KVM_INJECT(cmd, intid); /* Each successive IRQ will preempt the previous one. */ tmp = wait_for_and_activate_irq(); @@ -391,15 +396,33 @@ static void test_inject_preemption(struct test_args *args, /* finish handling the IRQs starting with the highest priority one. */ for (i = 0; i < num; i++) { intid = num - i - 1 + first_intid; + + if (exclude && test_bit(intid - first_intid, exclude)) + continue; + gic_set_eoi(intid); - if (args->eoi_split) - gic_set_dir(intid); + } + + if (args->eoi_split) { + for (i = 0; i < num; i++) { + intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + + if (args->eoi_split) + gic_set_dir(intid); + } } local_irq_enable(); - for (i = 0; i < num; i++) + for (i = 0; i < num; i++) { + if (exclude && test_bit(i, exclude)) + continue; + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + } GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); GUEST_ASSERT_IAR_EMPTY(); @@ -437,33 +460,32 @@ static void test_injection_failure(struct test_args *args, static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) { - /* - * Test up to 4 levels of preemption. The reason is that KVM doesn't - * currently implement the ability to have more than the number-of-LRs - * number of concurrently active IRQs. The number of LRs implemented is - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. - */ + /* Timer PPIs cannot be injected from userspace */ + static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) | + BIT(30 - MIN_PPI) | + BIT(28 - MIN_PPI) | + BIT(26 - MIN_PPI)); + if (f->sgi) - test_inject_preemption(args, MIN_SGI, 4, f->cmd); + test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd); if (f->ppi) - test_inject_preemption(args, MIN_PPI, 4, f->cmd); + test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd); if (f->spi) - test_inject_preemption(args, MIN_SPI, 4, f->cmd); + test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd); } static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) { - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ if (f->sgi) - guest_restore_active(args, MIN_SGI, 4, f->cmd); + guest_restore_active(args, MIN_SGI, 16, f->cmd); if (f->ppi) - guest_restore_active(args, MIN_PPI, 4, f->cmd); + guest_restore_active(args, MIN_PPI, 16, f->cmd); if (f->spi) - guest_restore_active(args, MIN_SPI, 4, f->cmd); + guest_restore_active(args, MIN_SPI, 31, f->cmd); } static void guest_code(struct test_args *args) @@ -474,12 +496,12 @@ static void guest_code(struct test_args *args) gic_init(GIC_V3, 1); - for (i = 0; i < nr_irqs; i++) - gic_irq_enable(i); - for (i = MIN_SPI; i < nr_irqs; i++) gic_irq_set_config(i, !level_sensitive); + for (i = 0; i < nr_irqs; i++) + gic_irq_enable(i); + gic_set_eoi_split(args->eoi_split); reset_priorities(args); @@ -621,18 +643,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { @@ -643,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) - close(fd[f]); + kvm_close(fd[f]); } /* handles the valid case: intid=0xffffffff num=1 */ @@ -759,7 +775,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_args_set(vcpu, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); @@ -787,6 +802,221 @@ done: kvm_vm_free(vm); } +static void guest_code_asym_dir(struct test_args *args, int cpuid) +{ + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + if (cpuid == 0) { + uint32_t intid; + + local_irq_disable(); + + gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO); + gic_irq_enable(MIN_SPI); + gic_irq_set_pending(MIN_SPI); + + intid = wait_for_and_activate_irq(); + GUEST_ASSERT_EQ(intid, MIN_SPI); + + gic_set_eoi(intid); + isb(); + + WRITE_ONCE(args->shared_data, MIN_SPI); + dsb(ishst); + + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) == MIN_SPI); + GUEST_ASSERT(!gic_irq_get_active(MIN_SPI)); + } else { + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) != MIN_SPI); + + gic_set_dir(MIN_SPI); + isb(); + + WRITE_ONCE(args->shared_data, 0); + dsb(ishst); + } + + GUEST_DONE(); +} + +static void guest_code_group_en(struct test_args *args, int cpuid) +{ + uint32_t intid; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(0); + gic_set_priority_mask(CPU_PRIO_MASK); + /* SGI0 is G0, which is disabled */ + gic_irq_set_group(0, 0); + + /* Configure all SGIs with decreasing priority */ + for (intid = 0; intid < MIN_PPI; intid++) { + gic_set_priority(intid, (intid + 1) * 8); + gic_irq_enable(intid); + gic_irq_set_pending(intid); + } + + /* Ack and EOI all G1 interrupts */ + for (int i = 1; i < MIN_PPI; i++) { + intid = wait_for_and_activate_irq(); + + GUEST_ASSERT(intid < MIN_PPI); + gic_set_eoi(intid); + isb(); + } + + /* + * Check that SGI0 is still pending, inactive, and that we cannot + * ack anything. + */ + GUEST_ASSERT(gic_irq_get_pending(0)); + GUEST_ASSERT(!gic_irq_get_active(0)); + GUEST_ASSERT_IAR_EMPTY(); + GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS); + + /* Open the G0 gates, and verify we can ack SGI0 */ + write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1); + isb(); + + do { + intid = read_sysreg_s(SYS_ICC_IAR0_EL1); + } while (intid == IAR_SPURIOUS); + + GUEST_ASSERT(intid == 0); + GUEST_DONE(); +} + +static void guest_code_timer_spi(struct test_args *args, int cpuid) +{ + uint32_t intid; + u64 val; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + /* Add a pending SPI so that KVM starts trapping DIR */ + gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO); + gic_irq_set_pending(MIN_SPI + cpuid); + + /* Configure the timer with a higher priority, make it pending */ + gic_set_priority(27, IRQ_DEFAULT_PRIO - 8); + + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* Enable both interrupts */ + gic_irq_enable(MIN_SPI + cpuid); + gic_irq_enable(27); + + /* The timer must fire */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + /* Check that we can deassert it */ + write_sysreg(0, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(!gic_irq_get_pending(27)); + + /* + * Priority drop, deactivation -- we expect that the host + * deactivation will have been effective + */ + gic_set_eoi(27); + gic_set_dir(27); + + GUEST_ASSERT(!gic_irq_get_active(27)); + + /* Do it one more time */ + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* The timer must fire again */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + GUEST_DONE(); +} + +static void *test_vcpu_run(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct ucall uc; + + while (1) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + + return NULL; +} + +static void test_vgic_two_cpus(void *gcode) +{ + pthread_t thr[2]; + struct kvm_vcpu *vcpus[2]; + struct test_args args = {}; + struct kvm_vm *vm; + vm_vaddr_t args_gva; + int gic_fd, ret; + + vm = vm_create_with_vcpus(2, gcode, vcpus); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpus[0]); + vcpu_init_descriptor_tables(vcpus[1]); + + /* Setup the guest args page (so it gets the args). */ + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vcpus[0], 2, args_gva, 0); + vcpu_args_set(vcpus[1], 2, args_gva, 1); + + gic_fd = vgic_v3_setup(vm, 2, 64); + + ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret); + ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret); + + pthread_join(thr[0], NULL); + pthread_join(thr[1], NULL); + + close(gic_fd); + kvm_vm_free(vm); +} + static void help(const char *name) { printf( @@ -809,6 +1039,9 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; + TEST_REQUIRE(kvm_supports_vgic_v3()); + test_disable_default_vgic(); + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': @@ -840,6 +1073,9 @@ int main(int argc, char **argv) test_vgic(nr_irqs, false /* level */, true /* eoi_split */); test_vgic(nr_irqs, true /* level */, false /* eoi_split */); test_vgic(nr_irqs, true /* level */, true /* eoi_split */); + test_vgic_two_cpus(guest_code_asym_dir); + test_vgic_two_cpus(guest_code_group_en); + test_vgic_two_cpus(guest_code_timer_spi); } else { test_vgic(nr_irqs, level_sensitive, eoi_split); } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..e857a605f577 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -27,7 +27,7 @@ static vm_paddr_t gpa_base; static struct kvm_vm *vm; static struct kvm_vcpu **vcpus; -static int gic_fd, its_fd; +static int its_fd; static struct test_data { bool request_vcpus_stop; @@ -118,11 +118,16 @@ static void guest_setup_gic(void) guest_setup_its_mappings(); guest_invalidate_all_rdists(); + + /* SYNC to ensure ITS setup is complete */ + for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++) + its_send_sync_cmd(test_data.cmdq_base_va, cpuid); } static void guest_code(size_t nr_lpis) { guest_setup_gic(); + local_irq_enable(); GUEST_SYNC(0); @@ -214,9 +219,6 @@ static void setup_test_data(void) static void setup_gic(void) { - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); - its_fd = vgic_its_setup(vm); } @@ -334,7 +336,7 @@ static void setup_vm(void) { int i; - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); @@ -355,7 +357,6 @@ static void setup_vm(void) static void destroy_vm(void) { close(its_fd); - close(gic_fd); kvm_vm_free(vm); free(vcpus); } @@ -374,6 +375,8 @@ int main(int argc, char **argv) u32 nr_threads; int c; + TEST_REQUIRE(kvm_supports_vgic_v3()); + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { switch (c) { case 'v': diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index d31b9f64ba14..ae36325c022f 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -28,7 +28,6 @@ struct vpmu_vm { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - int gic_fd; }; static struct vpmu_vm vpmu_vm; @@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr) return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } -static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) -{ - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); -} - static uint64_t get_counters_mask(uint64_t n) { uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); @@ -300,7 +294,7 @@ static void guest_sync_handler(struct ex_regs *regs) uint64_t esr, ec; esr = read_sysreg(esr_el1); - ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(esr); __GUEST_ASSERT(expected_ec == ec, "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", @@ -338,10 +332,10 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * Reading/writing the event count/type registers should cause * an UNDEFINED exception. */ - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); /* * The bit corresponding to the (unimplemented) counter in * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. @@ -415,46 +409,37 @@ static void create_vpmu_vm(void *guest_code) .attr = KVM_ARM_VCPU_PMU_V3_IRQ, .addr = (uint64_t)&irq, }; - struct kvm_device_attr init_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, - }; /* The test creates the vpmu_vm multiple times. Ensure a clean state */ memset(&vpmu_vm, 0, sizeof(vpmu_vm)); vpmu_vm.vm = vm_create(1); vm_init_descriptor_tables(vpmu_vm.vm); - for (ec = 0; ec < ESR_EC_NUM; ec++) { + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, guest_sync_handler); } /* Create vCPU with PMUv3 */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, - "Failed to create vgic-v3, skipping"); + + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); /* Make sure that PMUv3 support is indicated in the ID register */ - vcpu_get_reg(vpmu_vm.vcpu, - KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); - /* Initialize vPMU */ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); } static void destroy_vpmu_vm(void) { - close(vpmu_vm.gic_fd); kvm_vm_free(vpmu_vm.vm); } @@ -476,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) } } -static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) { struct kvm_vcpu *vcpu; - uint64_t pmcr, pmcr_orig; + unsigned int prev; + int ret; create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig); - pmcr = pmcr_orig; + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); - /* - * Setting a larger value of PMCR.N should not modify the field, and - * return a success. - */ - set_pmcr_n(&pmcr, pmcr_n); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); if (expect_fail) - TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", - pmcr, pmcr_n); + TEST_ASSERT(ret && errno == EINVAL, + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", + nr_counters, prev); else - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)", - pmcr_n, get_pmcr_n(pmcr)); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); } /* @@ -517,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n) pr_debug("Test with pmcr_n %lu\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp); + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); run_vcpu(vcpu, pmcr_n); @@ -529,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n) * Reset and re-initialize the vCPU, and run the guest code again to * check if PMCR_EL0.N is preserved. */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); aarch64_vcpu_setup(vcpu, &init); vcpu_init_descriptor_tables(vcpu); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); run_vcpu(vcpu, pmcr_n); @@ -558,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) uint64_t set_reg_id, clr_reg_id, reg_val; uint64_t valid_counters_mask, max_counters_mask; - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; valid_counters_mask = get_counters_mask(pmcr_n); @@ -572,12 +552,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * Test if the 'set' and 'clr' variants of the registers * are initialized based on the number of valid counters. */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -589,12 +569,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); @@ -612,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n) { pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); destroy_vpmu_vm(); } @@ -625,16 +605,30 @@ static uint64_t get_pmcr_n_limit(void) uint64_t pmcr; create_vpmu_vm(guest_code); - vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); destroy_vpmu_vm(); return get_pmcr_n(pmcr); } +static bool kvm_supports_nr_counters_attr(void) +{ + bool supported; + + create_vpmu_vm(NULL); + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); + destroy_vpmu_vm(); + + return supported; +} + int main(void) { uint64_t i, pmcr_n; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + TEST_REQUIRE(kvm_supports_vgic_v3()); + TEST_REQUIRE(kvm_supports_nr_counters_attr()); pmcr_n = get_pmcr_n_limit(); for (i = 0; i <= pmcr_n; i++) { diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c new file mode 100644 index 000000000000..60cb25454899 --- /dev/null +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/sizes.h> + +#include <kvm_util.h> +#include <processor.h> + +#include "ucall_common.h" + +struct kvm_coalesced_io { + struct kvm_coalesced_mmio_ring *ring; + uint32_t ring_size; + uint64_t mmio_gpa; + uint64_t *mmio; + + /* + * x86-only, but define pio_port for all architectures to minimize the + * amount of #ifdeffery and complexity, without having to sacrifice + * verbose error messages. + */ + uint8_t pio_port; +}; + +static struct kvm_coalesced_io kvm_builtin_io_ring; + +#ifdef __x86_64__ +static const int has_pio = 1; +#else +static const int has_pio = 0; +#endif + +static void guest_code(struct kvm_coalesced_io *io) +{ + int i, j; + + for (;;) { + for (j = 0; j < 1 + has_pio; j++) { + /* + * KVM always leaves one free entry, i.e. exits to + * userspace before the last entry is filled. + */ + for (i = 0; i < io->ring_size - 1; i++) { +#ifdef __x86_64__ + if (i & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } +#ifdef __x86_64__ + if (j & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } + GUEST_SYNC(0); + + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); +#ifdef __x86_64__ + outl(io->pio_port, io->pio_port + i); +#endif + } +} + +static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + const bool want_pio = expected_exit == KVM_EXIT_IO; + struct kvm_coalesced_mmio_ring *ring = io->ring; + struct kvm_run *run = vcpu->run; + uint32_t pio_value; + + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + + vcpu_run(vcpu); + + /* + * Annoyingly, reading PIO data is safe only for PIO exits, otherwise + * data_offset is garbage, e.g. an MMIO gpa. + */ + if (run->exit_reason == KVM_EXIT_IO) + pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + else + pio_value = 0; + + TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && + run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && + *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && + run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && + pio_value == io->pio_port + io->ring_size - 1)), + "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n " + "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n " + "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x", + ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO", + want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, + (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, + run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); +} + +static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + int i; + + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit); + + TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first, + "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u", + ring->first, ring->last, io->ring_size, ring_start); + + for (i = 0; i < io->ring_size - 1; i++) { + uint32_t idx = (ring->first + i) % io->ring_size; + struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; + +#ifdef __x86_64__ + if (i & 1) + TEST_ASSERT(entry->phys_addr == io->pio_port && + entry->len == 4 && entry->pio && + *(uint32_t *)entry->data == io->pio_port + i, + "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", + io->pio_port, io->pio_port + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint32_t *)entry->data); + else +#endif + TEST_ASSERT(entry->phys_addr == io->mmio_gpa && + entry->len == 8 && !entry->pio, + "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", + io->mmio_gpa, io->mmio_gpa + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint64_t *)entry->data); + } +} + +static void test_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, uint32_t ring_start) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + + kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); +#ifdef __x86_64__ + kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); +#endif + + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO); +#ifdef __x86_64__ + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO); +#endif + + /* + * Verify ucall, which may use non-coalesced MMIO or PIO, generates an + * immediate exit. + */ + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(ring->first, ring_start); + TEST_ASSERT_EQ(ring->last, ring_start); + + /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO); + +#ifdef __x86_64__ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO); +#endif +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO)); + +#ifdef __x86_64__ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO)); +#endif + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + kvm_builtin_io_ring = (struct kvm_coalesced_io) { + /* + * The I/O ring is a kernel-allocated page whose address is + * relative to each vCPU's run page, with the page offset + * provided by KVM in the return of KVM_CAP_COALESCED_MMIO. + */ + .ring = (void *)vcpu->run + + (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()), + + /* + * The size of the I/O ring is fixed, but KVM defines the sized + * based on the kernel's PAGE_SIZE. Thus, userspace must query + * the host's page size at runtime to compute the ring size. + */ + .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) / + sizeof(struct kvm_coalesced_mmio), + + /* + * Arbitrary address+port (MMIO mustn't overlap memslots), with + * the MMIO GPA identity mapped in the guest. + */ + .mmio_gpa = 4ull * SZ_1G, + .mmio = (uint64_t *)(4ull * SZ_1G), + .pio_port = 0x80, + }; + + virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + + sync_global_to_guest(vm, kvm_builtin_io_ring); + vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); + + for (i = 0; i < kvm_builtin_io_ring.ring_size; i++) + test_coalesced_io(vcpu, &kvm_builtin_io_ring, i); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9f24303acb8c..0a1ea1d1e2d8 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -20,38 +20,6 @@ #include "guest_modes.h" #include "ucall_common.h" -#ifdef __aarch64__ -#include "aarch64/vgic.h" - -static int gic_fd; - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ - /* - * The test can still run even if hardware does not support GICv3, as it - * is only an optimization to reduce guest exits. - */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ - if (gic_fd > 0) - close(gic_fd); -} - -#else /* __aarch64__ */ - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ -} - -#endif - /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, dirty_log_manual_caps); - arch_setup_vm(vm, nr_vcpus); - /* Start the iterations */ iteration = 0; host_quit = false; @@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } memstress_free_bitmaps(bitmaps, p->slots); - arch_cleanup_vm(vm); memstress_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aacf80f57439..d58a641b0e6a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -31,15 +31,18 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 - /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -75,6 +78,8 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; static uint64_t iteration; +static uint64_t nr_writes; +static bool vcpu_stop; /* * Guest physical memory offset of the testing memory slot. @@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(void) { uint64_t addr; - int i; + +#ifdef __s390x__ + uint64_t i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -107,16 +114,19 @@ static void guest_code(void) for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } +#endif while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + while (!READ_ONCE(vcpu_stop)) { addr = guest_test_virt_mem; addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } GUEST_SYNC(1); @@ -133,25 +143,18 @@ static uint64_t host_num_pages; /* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count; -static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont; -/* - * This is only set by main thread, and only cleared by vcpu thread. It is - * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC - * is the only place that we'll guarantee both "dirty bit" and "dirty data" - * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted - * after setting dirty bit but before the data is written. - */ -static atomic_t vcpu_sync_stop_requested; + /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; + /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page; +static uint64_t dirty_ring_last_page = -1ULL; + +/* + * In addition to the above, it is possible (especially if this + * test is run nested) for the above scenario to repeat multiple times: + * + * The following can happen: + * + * - L1 vCPU: Memory write is logged to PML but not committed. + * + * - L1 test thread: Ignores the write because its last dirty ring entry + * Resets the dirty ring which: + * - Resets the A/D bits in EPT + * - Issues tlb flush (invept), which is intercepted by L0 + * + * - L0: frees the whole nested ept mmu root as the response to invept, + * and thus ensures that when memory write is retried, it will fault again + * + * - L1 vCPU: Same memory write is logged to the PML but not committed again. + * + * - L1 test thread: Ignores the write because its last dirty ring entry (again) + * Resets the dirty ring which: + * - Resets the A/D bits in EPT (again) + * - Issues tlb flush (again) which is intercepted by L0 + * + * ... + * + * N times + * + * - L1 vCPU: Memory write is logged in the PML and then committed. + * Lots of other memory writes are logged and committed. + * ... + * + * - L1 test thread: Sees the memory write along with other memory writes + * in the dirty ring, and since the write is usually not + * the last entry in the dirty-ring and has a very outdated + * iteration, the test fails. + * + * + * Note that this is only possible when the write was the last log entry + * write during iteration N-1, thus remember last iteration last log entry + * and also don't fail when it is reported in the next iteration, together with + * an outdated iteration count. + */ +static uint64_t dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; -static void vcpu_kick(void) -{ - pthread_kill(vcpu_thread, SIG_IPI); -} - -/* - * In our test we do signal tricks, let's use a better version of - * sem_wait to avoid signal interrupts - */ -static void sem_wait_until(sem_t *sem) -{ - int ret; - - do - ret = sem_wait(sem); - while (ret == -1 && errno == EINTR); -} - static bool clear_log_supported(void) { return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); @@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Should only be called after a GUEST_SYNC */ static void vcpu_handle_sync_stop(void) { - if (atomic_read(&vcpu_sync_stop_requested)) { - /* It means main thread is sleeping waiting */ - atomic_set(&vcpu_sync_stop_requested, false); + if (READ_ONCE(vcpu_stop)) { sem_post(&sem_vcpu_stop); - sem_wait_until(&sem_vcpu_cont); + sem_wait(&sem_vcpu_cont); } } -static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), - "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); @@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, "%u != %u", cur->slot, slot); TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); - //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); @@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, return count; } -static void dirty_ring_wait_vcpu(void) -{ - /* This makes sure that hardware PML cache flushed */ - vcpu_kick(); - sem_wait_until(&sem_vcpu_stop); -} - -static void dirty_ring_continue_vcpu(void) -{ - pr_info("Notifying vcpu to continue\n"); - sem_post(&sem_vcpu_cont); -} - static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx) { - uint32_t count = 0, cleared; - bool continued_vcpu = false; - - dirty_ring_wait_vcpu(); - - if (!dirty_ring_vcpu_ring_full) { - /* - * This is not a ring-full event, it's safe to allow - * vcpu to continue - */ - dirty_ring_continue_vcpu(); - continued_vcpu = true; - } + uint32_t count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - - if (!continued_vcpu) { - TEST_ASSERT(dirty_ring_vcpu_ring_full, - "Didn't continue vcpu even without ring full"); - dirty_ring_continue_vcpu(); - } - - pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ if (get_ucall(vcpu, NULL) == UCALL_SYNC) { - /* We should allow this to continue */ - ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || - (ret == -1 && err == EINTR)) { - /* Update the flag first before pause */ - WRITE_ONCE(dirty_ring_vcpu_ring_full, - run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&sem_vcpu_stop); - pr_info("vcpu stops because %s...\n", - dirty_ring_vcpu_ring_full ? - "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&sem_vcpu_cont); - pr_info("vcpu continues now.\n"); + vcpu_handle_sync_stop(); + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + WRITE_ONCE(dirty_ring_vcpu_ring_full, true); + vcpu_handle_sync_stop(); } else { TEST_ASSERT(false, "Invalid guest sync status: " "exit_reason=%s", @@ -426,7 +407,7 @@ struct log_mode { void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -449,15 +430,6 @@ struct log_mode { }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } -static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vcpu, ret, err); + mode->after_vcpu_run(vcpu); } static void *vcpu_worker(void *data) { - int ret; struct kvm_vcpu *vcpu = data; - uint64_t pages_count = 0; - struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) - + sizeof(sigset_t)); - sigset_t *sigset = (sigset_t *) &sigmask->sigset; - /* - * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the - * ioctl to return with -EINTR, but it is still pending and we need - * to accept it with the sigwait. - */ - sigmask->len = 8; - pthread_sigmask(0, NULL, sigset); - sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); - - sigemptyset(sigset); - sigaddset(sigset, SIG_IPI); + sem_wait(&sem_vcpu_cont); while (!READ_ONCE(host_quit)) { - /* Clear any existing kick signals */ - pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = __vcpu_run(vcpu); - if (ret == -1 && errno == EINTR) { - int sig = -1; - sigwait(sigset, &sig); - assert(sig == SIG_IPI); - } - log_mode_after_vcpu_run(vcpu, ret, errno); + vcpu_run(vcpu); + log_mode_after_vcpu_run(vcpu); } - pr_info("Dirtied %"PRIu64" pages\n", pages_count); - return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { + uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; uint64_t step = vm_num_host_pages(mode, 1); - uint64_t page; - uint64_t *value_ptr; - uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { - value_ptr = host_test_mem + page * host_page_size; - - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } + uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - if (__test_and_clear_bit_le(page, bmap)) { - bool matched; - - host_dirty_count++; + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { + nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - matched = (*value_ptr == iteration || - *value_ptr == iteration - 1); - - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { - if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; + if (val == iteration) + continue; + + if (host_log_mode == LOG_MODE_DIRTY_RING) { + /* + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. + */ + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) continue; - } else if (page == dirty_ring_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && + val < iteration) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { - host_clear_count++; + nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } } + + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); + + host_dirty_count += nr_dirty_pages; + host_clear_count += nr_clean_pages; } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, @@ -674,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -688,7 +600,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long *bmap[2]; uint32_t ring_buf_idx = 0; int sem_val; @@ -731,12 +643,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + + /* + * The workaround in guest_code() to write all pages prior to the first + * iteration isn't compatible with the dirty ring, as the dirty ring + * support relies on the vCPU to actually stop when vcpu_stop is set so + * that the vCPU doesn't hang waiting for the dirty ring to be emptied. + */ + TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, + "Test needs to be updated to support s390 dirty ring"); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -757,14 +678,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(host_quit, false); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -776,21 +692,95 @@ static void run_test(enum vm_guest_mode mode, void *arg) sem_getvalue(&sem_vcpu_cont, &sem_val); TEST_ASSERT_EQ(sem_val, 0); + TEST_ASSERT_EQ(vcpu_stop, false); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, - &ring_buf_idx); + for (iteration = 1; iteration <= p->iterations; iteration++) { + unsigned long i; + + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); + + /* + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. + */ + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ + usleep(1000); + + sync_global_from_guest(vm, nr_writes); + + /* + * Reap dirty pages while the guest is running so that + * dirty ring full events are resolved, i.e. so that a + * larger interval doesn't always end up with a vCPU + * that's effectively blocked. Collecting while the + * guest is running also verifies KVM doesn't lose any + * state. + * + * For bitmap modes, KVM overwrites the entire bitmap, + * i.e. collecting the bitmaps is destructive. Collect + * the bitmap only on the first pass, otherwise this + * test would lose track of dirty pages. + */ + if (i && host_log_mode != LOG_MODE_DIRTY_RING) + continue; + + /* + * For the dirty ring, empty the ring on subsequent + * passes only if the ring was filled at least once, + * to verify KVM's handling of a full ring (emptying + * the ring on every pass would make it unlikely the + * vCPU would ever fill the fing). + */ + if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) + continue; + + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[0], host_num_pages, + &ring_buf_idx); + } + + /* + * Stop the vCPU prior to collecting and verifying the dirty + * log. If the vCPU is allowed to run during collection, then + * pages that are written during this iteration may be missed, + * i.e. collected in the next iteration. And if the vCPU is + * writing memory during verification, pages that this thread + * sees as clean may be written with this iteration's value. + */ + WRITE_ONCE(vcpu_stop, true); + sync_global_to_guest(vm, vcpu_stop); + sem_wait(&sem_vcpu_stop); /* - * See vcpu_sync_stop_requested definition for details on why - * we need to stop vcpu when verify data. + * Clear vcpu_stop after the vCPU thread has acknowledge the + * stop request and is waiting, i.e. is definitely not running! */ - atomic_set(&vcpu_sync_stop_requested, true); - sem_wait_until(&sem_vcpu_stop); + WRITE_ONCE(vcpu_stop, false); + sync_global_to_guest(vm, vcpu_stop); + + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -798,32 +788,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ - assert(host_log_mode == LOG_MODE_DIRTY_RING || - atomic_read(&vcpu_sync_stop_requested) == false); + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - - /* - * Set host_quit before sem_vcpu_cont in the final iteration to - * ensure that the vCPU worker doesn't resume the guest. As - * above, the dirty ring test may stop and wait even when not - * explicitly request to do so, i.e. would hang waiting for a - * "continue" if it's allowed to resume the guest. - */ - if (++iteration == p->iterations) - WRITE_ONCE(host_quit, true); - - sem_post(&sem_vcpu_cont); - sync_global_to_guest(vm, iteration); } + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } @@ -857,7 +837,6 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; - sigset_t sigset; sem_init(&sem_vcpu_stop, 0, 0); sem_init(&sem_vcpu_cont, 0, 0); @@ -908,19 +887,12 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", p.iterations, p.interval); - srandom(time(0)); - - /* Ensure that vCPU threads start with SIG_IPI blocked. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 91f05f78e824..f4644c9d2d3b 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) } #ifdef __aarch64__ -static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, + struct kvm_vcpu_init *init) { struct vcpu_reg_sublist *s; + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); + for_each_sublist(c, s) if (s->capability) init->features[s->feature / 32] |= 1 << (s->feature % 32); @@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) { - struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu_init init; struct kvm_vcpu *vcpu; - prepare_vcpu_init(c, &init); + prepare_vcpu_init(vm, c, &init); vcpu = __vm_vcpu_add(vm, 0); aarch64_vcpu_setup(vcpu, &init); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ba0c8e996035..618c937f3c90 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -13,14 +13,19 @@ #include <linux/bitmap.h> #include <linux/falloc.h> +#include <linux/sizes.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #include "kvm_util.h" +#include "numaif.h" #include "test_util.h" +#include "ucall_common.h" -static void test_file_read_write(int fd) +static size_t page_size; + +static void test_file_read_write(int fd, size_t total_size) { char buf[64]; @@ -34,15 +39,177 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap(int fd, size_t page_size) +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + +static void test_mmap_supported(int fd, size_t total_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + memset(mem, val, total_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + page_size); + TEST_ASSERT(!ret, "fallocate the first page should succeed."); + + for (i = 0; i < page_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); + for (; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + memset(mem, val, page_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, total_size); +} + +static void test_mbind(int fd, size_t total_size) +{ + const unsigned long nodemask_0 = 1; /* nid: 0 */ + unsigned long nodemask = 0; + unsigned long maxnode = 8; + int policy; + char *mem; + int ret; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + /* Test MPOL_INTERLEAVE policy */ + kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, + "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_INTERLEAVE, nodemask_0, policy, nodemask); + + /* Test basic MPOL_BIND policy */ + kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, + "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_BIND, nodemask_0, policy, nodemask); + + /* Test MPOL_DEFAULT policy */ + kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, + "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", + MPOL_DEFAULT, policy, nodemask); + + /* Test with invalid policy */ + ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "mbind with invalid policy should fail with EINVAL"); + + kvm_munmap(mem, total_size); +} + +static void test_numa_allocation(int fd, size_t total_size) +{ + unsigned long node0_mask = 1; /* Node 0 */ + unsigned long node1_mask = 2; /* Node 1 */ + unsigned long maxnode = 8; + void *pages[4]; + int status[4]; + char *mem; + int i; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + for (i = 0; i < 4; i++) + pages[i] = (char *)mem + page_size * i; + + /* Set NUMA policy after allocation */ + memset(mem, 0xaa, page_size); + kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); + + /* Set NUMA policy before allocation */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + /* Validate if pages are allocated on specified NUMA nodes */ + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); + + /* Punch hole for all pages */ + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); + + /* Change NUMA policy nodes and reallocate */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); + + kvm_munmap(mem, total_size); +} + +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); + + for (i = 0; i < accessible_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, map_size); +} + +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); +} + +static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); TEST_ASSERT_EQ(mem, MAP_FAILED); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); } -static void test_file_size(int fd, size_t page_size, size_t total_size) +static void test_file_size(int fd, size_t total_size) { struct stat sb; int ret; @@ -53,7 +220,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(sb.st_blksize, page_size); } -static void test_fallocate(int fd, size_t page_size, size_t total_size) +static void test_fallocate(int fd, size_t total_size) { int ret; @@ -90,7 +257,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } -static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +static void test_invalid_punch_hole(int fd, size_t total_size) { struct { off_t offset; @@ -120,26 +287,18 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } } -static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, + uint64_t guest_memfd_flags) { - size_t page_size = getpagesize(); - uint64_t flag; size_t size; int fd; for (size = 1; size < page_size; size++) { - fd = __vm_create_guest_memfd(vm, size, 0); - TEST_ASSERT(fd == -1 && errno == EINVAL, + fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); + TEST_ASSERT(fd < 0 && errno == EINVAL, "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", size); } - - for (flag = 0; flag; flag <<= 1) { - fd = __vm_create_guest_memfd(vm, page_size, flag); - TEST_ASSERT(fd == -1 && errno == EINVAL, - "guest_memfd() with flag '0x%lx' should fail with EINVAL", - flag); - } } static void test_create_guest_memfd_multiple(struct kvm_vm *vm) @@ -147,53 +306,187 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) int fd1, fd2, ret; struct stat st1, st2; - fd1 = __vm_create_guest_memfd(vm, 4096, 0); + fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); - fd2 = __vm_create_guest_memfd(vm, 8192, 0); + fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); ret = fstat(fd2, &st2); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); close(fd2); close(fd1); } -int main(int argc, char *argv[]) +static void test_guest_memfd_flags(struct kvm_vm *vm) { - size_t page_size; - size_t total_size; + uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + uint64_t flag; int fd; + + for (flag = BIT(0); flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + if (flag & valid_flags) { + TEST_ASSERT(fd >= 0, + "guest_memfd() with flag '0x%lx' should succeed", + flag); + close(fd); + } else { + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } + } +} + +#define gmem_test(__test, __vm, __flags) \ +do { \ + int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + \ + test_##__test(fd, page_size * 4); \ + close(fd); \ +} while (0) + +static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) +{ + test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags); + + gmem_test(file_read_write, vm, flags); + + if (flags & GUEST_MEMFD_FLAG_MMAP) { + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + gmem_test(numa_allocation, vm, flags); + } else { + gmem_test(fault_private, vm, flags); + } + + gmem_test(mmap_cow, vm, flags); + gmem_test(mbind, vm, flags); + } else { + gmem_test(mmap_not_supported, vm, flags); + } + + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); +} + +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + uint64_t flags; + + test_guest_memfd_flags(vm); + + __test_guest_memfd(vm, 0); + + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); + + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + + kvm_vm_free(vm); +} + +static void guest_code(uint8_t *mem, uint64_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + __GUEST_ASSERT(mem[i] == 0xaa, + "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); + + memset(mem, 0xff, size); + GUEST_DONE(); +} + +static void test_guest_memfd_guest(void) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables, and low memory contains + * the PCI hole and other MMIO regions that need to be avoided. + */ + const uint64_t gpa = SZ_4G; + const int slot = 1; + + struct kvm_vcpu *vcpu; struct kvm_vm *vm; + uint8_t *mem; + size_t size; + int fd, i; - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) + return; - page_size = getpagesize(); - total_size = page_size * 4; + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); - vm = vm_create_barebones(); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); - test_create_guest_memfd_invalid(vm); - test_create_guest_memfd_multiple(vm); + size = vm->page_size; + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + memset(mem, 0xaa, size); + kvm_munmap(mem, size); - fd = vm_create_guest_memfd(vm, total_size, 0); + virt_pg_map(vm, gpa, gpa); + vcpu_args_set(vcpu, 2, gpa, size); + vcpu_run(vcpu); - test_file_read_write(fd); - test_mmap(fd, page_size); - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + for (i = 0; i < size; i++) + TEST_ASSERT_EQ(mem[i], 0xff); close(fd); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + unsigned long vm_types, vm_type; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + page_size = getpagesize(); + + /* + * Not all architectures support KVM_CAP_VM_TYPES. However, those that + * support guest_memfd have that support for the default VM type. + */ + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) + test_guest_memfd(vm_type); + + test_guest_memfd_guest(); } diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 8092c2d0f5d6..bcf582852db9 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) expected_assert_msg, &assert_msg[offset]); } +/* + * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional + * guest asserts guest can be verified instead of being reported as failures. + */ +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = __vcpu_run(vcpu); + } while (r == -1 && errno == EINTR); + + TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r)); +} + static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, const char *expected_assert) { @@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, struct ucall uc; while (1) { - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", @@ -159,7 +174,7 @@ static void test_limits(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); run = vcpu->run; - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index bce73bcb973c..94bd6ed24cf3 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -20,7 +20,6 @@ #define SLEEPING_THREAD_NUM (1 << 4) #define FORK_NUM (1ULL << 9) #define DELAY_US_MAX 2000 -#define GUEST_CODE_PIO_PORT 4 sem_t *sem; diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index b3e97525cb55..e2c4e9f0010f 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) { switch (timer) { case VIRTUAL: @@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) isb(); } +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) { switch (timer) { @@ -139,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) timer_set_tval(timer, msec_to_cycles(msec)); } +static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + +static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + #endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..329e4f5079ea 100644 --- a/tools/testing/selftests/kvm/include/aarch64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index baeb3c859389..cc7a7f34ed37 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid); void gic_irq_clear_pending(unsigned int intid); bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); +void gic_irq_set_group(unsigned int intid, bool group); void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, vm_paddr_t pend_table); diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h index a76615fa39a1..a76615fa39a1 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 3722ed9c8f96..58feef3eb386 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, u32 collection_id, u32 intid); void its_send_invall_cmd(void *cmdq_base, u32 collection_id); +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id); #endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h new file mode 100644 index 000000000000..b973bb2c64a6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch { + bool has_gic; + int gic_fd; +}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 9b20a355d81a..ff928716574d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -12,6 +12,8 @@ #include <linux/stringify.h> #include <linux/types.h> +#include <asm/brk-imm.h> +#include <asm/esr.h> #include <asm/sysreg.h> @@ -60,6 +62,67 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -100,25 +163,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -#define ESR_EC_NUM 64 -#define ESR_EC_SHIFT 26 -#define ESR_EC_MASK (ESR_EC_NUM - 1) - -#define ESR_EC_UNKNOWN 0x0 -#define ESR_EC_SVC64 0x15 -#define ESR_EC_IABT 0x21 -#define ESR_EC_DABT 0x25 -#define ESR_EC_HW_BP_CURRENT 0x31 -#define ESR_EC_SSTEP_CURRENT 0x33 -#define ESR_EC_WP_CURRENT 0x35 -#define ESR_EC_BRK_INS 0x3c - -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); @@ -131,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); static inline void cpu_relax(void) @@ -210,6 +255,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -243,4 +298,90 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + +void test_wants_mte(void); +void test_disable_default_vgic(void); + +bool vm_supports_el2(struct kvm_vm *vm); + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} + +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) +{ + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + +#define MAPPED_EL2_SYSREG(el2, el1) \ + case SYS_##el1: \ + if (vcpu_has_el2(vcpu)) \ + alias = SYS_##el2; \ + break + + +static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) +{ + u32 alias = encoding; + + BUILD_BUG_ON(!__builtin_constant_p(encoding)); + + switch (encoding) { + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); + case SYS_SP_EL1: + if (!vcpu_has_el2(vcpu)) + return ARM64_CORE_REG(sp_el1); + + alias = SYS_SP_EL2; + break; + default: + BUILD_BUG(); + } + + return KVM_ARM64_SYS_REG(alias); +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); + +static inline unsigned int get_current_el(void) +{ + return (read_sysreg(CurrentEL) >> 2) & 0x3; +} + +#define do_smccc(...) \ +do { \ + if (get_current_el() == 2) \ + smccc_smc(__VA_ARGS__); \ + else \ + smccc_hvc(__VA_ARGS__); \ +} while (0) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h index cf0984106d14..cf0984106d14 100644 --- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h +++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h index 4ec801f37f00..4ec801f37f00 100644 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..688beccc9436 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -16,6 +16,9 @@ ((uint64_t)(flags) << 12) | \ index) +bool kvm_supports_vgic_v3(void); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +void __vgic_v3_init(int fd); int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h new file mode 100644 index 000000000000..d4e613162bba --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_SYSCALLS_H +#define SELFTEST_KVM_SYSCALLS_H + +#include <sys/syscall.h> + +#define MAP_ARGS0(m,...) +#define MAP_ARGS1(m,t,a,...) m(t,a) +#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) +#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__) +#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__) +#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__) +#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__) +#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__) + +#define __DECLARE_ARGS(t, a) t a +#define __UNPACK_ARGS(t, a) a + +#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args) +#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args) + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* Define a kvm_<syscall>() API to assert success. */ +#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \ +{ \ + int r; \ + \ + r = name(UNPACK_ARGS(nr_args, args)); \ + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \ +} + +/* + * Macro to define syscall APIs, either because KVM selftests doesn't link to + * the standard library, e.g. libnuma, or because there is no library that yet + * provides the syscall. These + */ +#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline long name(DECLARE_ARGS(nr_args, args)) \ +{ \ + return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \ +} \ +__KVM_SYSCALL_DEFINE(name, nr_args, args) + +/* + * Special case mmap(), as KVM selftest rarely/never specific an address, + * rarely specify an offset, and because the unique return code requires + * special handling anyways. + */ +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline int kvm_dup(int fd) +{ + int new_fd = dup(fd); + + TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd)); + return new_fd; +} + +__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size); +__KVM_SYSCALL_DEFINE(close, 1, int, fd); +__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len); +__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length); + +#endif /* SELFTEST_KVM_SYSCALLS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 63c2aaae51f3..81f4355ff28a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -18,8 +18,12 @@ #include <asm/atomic.h> #include <asm/kvm.h> +#include <sys/eventfd.h> #include <sys/ioctl.h> +#include <pthread.h> + +#include "kvm_syscalls.h" #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -46,6 +50,12 @@ struct userspace_mem_region { struct hlist_node slot_node; }; +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + struct kvm_vcpu { struct list_head list; uint32_t id; @@ -55,6 +65,10 @@ struct kvm_vcpu { #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif +#ifdef __aarch64__ + struct kvm_vcpu_init init; +#endif + struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; uint32_t fetch_index; uint32_t dirty_gfns_count; @@ -99,10 +113,7 @@ struct kvm_vm { struct kvm_vm_arch arch; - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; + struct kvm_binary_stats stats; /* * KVM region slots. These are the default memslots used by page @@ -167,12 +178,13 @@ enum vm_guest_mode { VM_MODE_P40V48_4K, VM_MODE_P40V48_16K, VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */ VM_MODE_P47V64_4K, VM_MODE_P44V64_4K, VM_MODE_P36V48_4K, VM_MODE_P36V48_16K, VM_MODE_P36V48_64K, + VM_MODE_P47V47_16K, VM_MODE_P36V47_16K, NUM_VM_MODES, }; @@ -208,7 +220,7 @@ extern enum vm_guest_mode vm_mode_default; #elif defined(__x86_64__) -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) @@ -228,6 +240,11 @@ extern enum vm_guest_mode vm_mode_default; #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) +#elif defined(__loongarch__) +#define VM_MODE_DEFAULT VM_MODE_P47V47_16K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + #endif #define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) @@ -243,16 +260,22 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); + +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} unsigned int kvm_check_cap(long cap); @@ -261,9 +284,6 @@ static inline bool kvm_has_cap(long cap) return kvm_check_cap(cap); } -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value @@ -428,8 +448,6 @@ const char *vm_guest_mode_string(uint32_t i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); @@ -460,6 +478,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } +static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone); +} + +static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone); +} + static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); @@ -468,6 +512,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } +static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) { ssize_t ret; @@ -507,15 +590,53 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, struct kvm_stats_desc *desc, uint64_t *data, size_t max_elements); -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements); + +#define __get_stat(stats, stat) \ +({ \ + uint64_t data; \ + \ + kvm_get_stat(stats, #stat, &data, 1); \ + data; \ +}) + +#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) +#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) + +static inline bool read_smt_control(char *buf, size_t buf_size) +{ + FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r"); + bool ret; + + if (!f) + return false; -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) + ret = fread(buf, sizeof(*buf), buf_size, f) > 0; + fclose(f); + + return ret; +} + +static inline bool is_smt_possible(void) { - uint64_t data; + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && + (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12))) + return false; - __vm_get_stat(vm, stat_name, &data, 1); - return data; + return true; +} + +static inline bool is_smt_on(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2)) + return true; + + return false; } void vm_create_irqchip(struct kvm_vm *vm); @@ -552,12 +673,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag uint32_t guest_memfd, uint64_t guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); + enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd_fd, uint64_t guest_memfd_offset); #ifndef vm_arch_has_protected_memory static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) @@ -567,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) #endif void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); @@ -678,16 +800,22 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + uint64_t val; + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); + return val; } static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) { struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } @@ -933,7 +1061,36 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_set_files_rlimit(uint32_t nr_vcpus); + +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus); @@ -1047,6 +1204,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { virt_arch_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } @@ -1107,10 +1265,14 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) */ void kvm_selftest_arch_init(void); -void kvm_arch_vm_post_create(struct kvm_vm *vm); +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); +void kvm_arch_vm_release(struct kvm_vm *vm); bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); uint32_t guest_get_vcpuid(void); +bool kvm_arch_has_default_irqchip(void); + #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h new file mode 100644 index 000000000000..2ed106b32c81 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch Constant Timer specific interface + */ +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" + +/* LoongArch timer frequency is constant 100MHZ */ +#define TIMER_FREQ (100UL << 20) +#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000) +#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000) +#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ) + +static inline unsigned long timer_get_cycles(void) +{ + unsigned long val = 0; + + __asm__ __volatile__( + "rdtime.d %0, $zero\n\t" + : "=r"(val) + : + ); + + return val; +} + +static inline unsigned long timer_get_cfg(void) +{ + return csr_read(LOONGARCH_CSR_TCFG); +} + +static inline unsigned long timer_get_val(void) +{ + return csr_read(LOONGARCH_CSR_TVAL); +} + +static inline void disable_timer(void) +{ + csr_write(0, LOONGARCH_CSR_TCFG); +} + +static inline void timer_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) +{ + unsigned long val; + + val = msec_to_cycles(msec) & CSR_TCFG_VAL; + val |= CSR_TCFG_EN; + if (period) + val |= CSR_TCFG_PERIOD; + csr_write(val, LOONGARCH_CSR_TCFG); +} + +static inline void __delay(uint64_t cycles) +{ + uint64_t start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h new file mode 100644 index 000000000000..76840ddda57d --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#ifndef __ASSEMBLER__ +#include "ucall_common.h" + +#else +/* general registers */ +#define zero $r0 +#define ra $r1 +#define tp $r2 +#define sp $r3 +#define a0 $r4 +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 +#define s0 $r23 +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 +#endif + +/* + * LoongArch page table entry definition + * Original header file arch/loongarch/include/asm/loongarch.h + */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 + +#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT) +#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT) +#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT) +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) +/* Coherent Cached */ +#define _CACHE_CC BIT_ULL(_CACHE_SHIFT) +#define PS_4K 0x0000000c +#define PS_16K 0x0000000e +#define PS_64K 0x00000010 +#define PS_DEFAULT_SIZE PS_16K + +/* LoongArch Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT) +#define PLV_MASK 0x3 +#define LOONGARCH_CSR_PRMD 0x1 +#define LOONGARCH_CSR_EUEN 0x2 +#define LOONGARCH_CSR_ECFG 0x4 +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER)) +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT) +#define EXCCODE_INT 0 /* Interrupt */ +#define INT_TI 11 /* Timer interrupt*/ +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ +#define LOONGARCH_CSR_EENTRY 0xc +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */ +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define LOONGARCH_CSR_PGDL 0x19 +#define LOONGARCH_CSR_PGDH 0x1a +/* Page table base */ +#define LOONGARCH_CSR_PGD 0x1b +#define LOONGARCH_CSR_PWCTL0 0x1c +#define LOONGARCH_CSR_PWCTL1 0x1d +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define LOONGARCH_CSR_CPUID 0x20 +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_TMID 0x40 +#define LOONGARCH_CSR_TCFG 0x41 +#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2)) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (0x1UL) +#define LOONGARCH_CSR_TVAL 0x42 +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) +/* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 +#define LOONGARCH_CSR_TLBRSAVE 0x8b +#define LOONGARCH_CSR_TLBREHI 0x8e +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "csrrd %[val], %[reg]\n\t" \ + : [val] "=r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define csr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "csrwr %[val], %[reg]\n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define EXREGS_GPRS (32) + +#ifndef __ASSEMBLER__ +void handle_tlb_refill(void); +void handle_exception(void); + +struct ex_regs { + unsigned long regs[EXREGS_GPRS]; + unsigned long pc; + unsigned long estat; + unsigned long badv; + unsigned long prmd; +}; + +#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) +#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) +#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) +#define EXREGS_SIZE sizeof(struct ex_regs) + +#define VECTOR_NUM 64 + +typedef void(*handler_fn)(struct ex_regs *); + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM]; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler); + +static inline void cpu_relax(void) +{ + asm volatile("nop" ::: "memory"); +} + +static inline void local_irq_enable(void) +{ + unsigned int flags = CSR_CRMD_IE; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void local_irq_disable(void) +{ + unsigned int flags = 0; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} +#else +#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) +#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) +#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) +#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8) +#endif + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h new file mode 100644 index 000000000000..d32ff5d8ffd0 --- /dev/null +++ b/tools/testing/selftests/kvm/include/lru_gen_util.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output. + * + * Copyright (C) 2025, Google LLC. + */ +#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H +#define SELFTEST_KVM_LRU_GEN_UTIL_H + +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> + +#include "test_util.h" + +#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */ +#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */ + +#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen" +#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled" +#define LRU_GEN_ENABLED 1 +#define LRU_GEN_MM_WALK 2 + +struct generation_stats { + int gen; + long age_ms; + long nr_anon; + long nr_file; +}; + +struct node_stats { + int node; + int nr_gens; /* Number of populated gens entries. */ + struct generation_stats gens[MAX_NR_GENS]; +}; + +struct memcg_stats { + unsigned long memcg_id; + int nr_nodes; /* Number of populated nodes entries. */ + struct node_stats nodes[MAX_NR_NODES]; +}; + +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg); +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats); +long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats); +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg); +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long total_pages); +bool lru_gen_usable(void); + +#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h index b020547403fd..29572a6d789c 100644 --- a/tools/testing/selftests/kvm/include/numaif.h +++ b/tools/testing/selftests/kvm/include/numaif.h @@ -1,55 +1,83 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/numaif.h - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Header file that provides access to NUMA API functions not explicitly - * exported to user space. - */ +/* Copyright (C) 2020, Google LLC. */ #ifndef SELFTEST_KVM_NUMAIF_H #define SELFTEST_KVM_NUMAIF_H -#define __NR_get_mempolicy 239 -#define __NR_migrate_pages 256 +#include <dirent.h> -/* System calls */ -long get_mempolicy(int *policy, const unsigned long *nmask, - unsigned long maxnode, void *addr, int flags) +#include <linux/mempolicy.h> + +#include "kvm_syscalls.h" + +KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask, + unsigned long, maxnode, void *, addr, int, flags); + +KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask, + unsigned long, maxnode); + +KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start, + unsigned long, len, unsigned long, home_node, + unsigned long, flags); + +KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode, + const unsigned long *, frommask, const unsigned long *, tomask); + +KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages, + const int *, nodes, int *, status, int, flags); + +KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode, + const unsigned long *, nodemask, unsigned long, maxnode, + unsigned int, flags); + +static inline int get_max_numa_node(void) { - return syscall(__NR_get_mempolicy, policy, nmask, - maxnode, addr, flags); + struct dirent *de; + int max_node = 0; + DIR *d; + + /* + * Assume there's a single node if the kernel doesn't support NUMA, + * or if no nodes are found. + */ + d = opendir("/sys/devices/system/node"); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + int node_id; + char *endptr; + + if (strncmp(de->d_name, "node", 4) != 0) + continue; + + node_id = strtol(de->d_name + 4, &endptr, 10); + if (*endptr != '\0') + continue; + + if (node_id > max_node) + max_node = node_id; + } + closedir(d); + + return max_node; } -long migrate_pages(int pid, unsigned long maxnode, - const unsigned long *frommask, - const unsigned long *tomask) +static bool is_numa_available(void) { - return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); + /* + * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall + * fails with ENOSYS, then the kernel was built without NUMA support. + * if the syscall fails with EPERM, then the process/user lacks the + * necessary capabilities (CAP_SYS_NICE). + */ + return !get_mempolicy(NULL, NULL, 0, NULL, 0) || + (errno != ENOSYS && errno != EPERM); } -/* Policies */ -#define MPOL_DEFAULT 0 -#define MPOL_PREFERRED 1 -#define MPOL_BIND 2 -#define MPOL_INTERLEAVE 3 - -#define MPOL_MAX MPOL_INTERLEAVE - -/* Flags for get_mem_policy */ -#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ - /* Warning: MPOL_F_NODE is unsupported and - * subject to change. Don't use. - */ -#define MPOL_F_ADDR (1<<1) /* look up vma using address */ -#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ - -/* Flags for mbind */ -#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +static inline bool is_multi_numa_node_system(void) +{ + return is_numa_available() && get_max_numa_node() >= 1; +} #endif /* SELFTEST_KVM_NUMAIF_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 5f389166338c..e58282488beb 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -9,8 +9,22 @@ #include <linux/stringify.h> #include <asm/csr.h> +#include <asm/vdso/processor.h> #include "kvm_util.h" +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_FUNCT3 0x7000 +#define INSN_SHIFT_FUNCT3 12 + +#define INSN_CSR_MASK 0xfff00000 +#define INSN_CSR_SHIFT 20 + +#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) +#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) + static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, uint64_t idx, uint64_t size) { @@ -60,7 +74,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); } -struct ex_regs { +struct pt_regs { + unsigned long epc; unsigned long ra; unsigned long sp; unsigned long gp; @@ -92,16 +107,19 @@ struct ex_regs { unsigned long t4; unsigned long t5; unsigned long t6; - unsigned long epc; + /* Supervisor/Machine CSRs */ unsigned long status; + unsigned long badaddr; unsigned long cause; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #define NR_VECTORS 2 #define NR_EXCEPTIONS 32 #define EC_MASK (NR_EXCEPTIONS - 1) -typedef void(*exception_handler_fn)(struct ex_regs *); +typedef void(*exception_handler_fn)(struct pt_regs *); void vm_init_vector_tables(struct kvm_vm *vm); void vcpu_init_vector_tables(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..b0ed71302722 100644 --- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include <linux/bitops.h> + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h index e43a57d99b56..e43a57d99b56 100644 --- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h index 255c9b990f4c..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390/processor.h @@ -21,10 +21,21 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + /* Is there a portable way to do this? */ static inline void cpu_relax(void) { barrier(); } +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + #endif diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + * Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include <linux/types.h> + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index 8035a872a351..8035a872a351 100644 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..b4872ba8ed12 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include <setjmp.h> +#include <signal.h> #include <stdlib.h> #include <stdarg.h> #include <stdbool.h> @@ -22,7 +24,7 @@ #define msecs_to_usecs(msec) ((msec) * 1000ULL) -static inline int _no_printf(const char *format, ...) { return 0; } +static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } #ifdef DEBUG #define pr_debug(...) printf(__VA_ARGS__) @@ -78,6 +80,23 @@ do { \ __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); @@ -153,6 +172,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void); /* * Whether or not the given source type is shared memory (as opposed to diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index bed316fdecd5..80fe9f69b38d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/apic.h - * * Copyright (C) 2021, Google LLC. */ @@ -11,6 +9,7 @@ #include <stdint.h> #include "processor.h" +#include "ucall_common.h" #define APIC_DEFAULT_GPA 0xfee00000ULL @@ -60,6 +59,14 @@ #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 #define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 void apic_disable(void); void xapic_enable(void); @@ -85,9 +92,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +{ + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); +} + static inline void x2apic_write_reg(unsigned int reg, uint64_t value) { - wrmsr(APIC_BASE_MSR + (reg >> 4), value); + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); } +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + #endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 901caf0e0939..5a74bb30e2f8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/evmcs.h - * * Copyright (C) 2018, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_EVMCS_H diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index fa65b908b13e..f13e532be240 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/hyperv.h - * * Copyright (C) 2021, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_HYPERV_H @@ -186,6 +183,18 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ #define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) @@ -343,4 +352,10 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 972bb1c4ab4c..972bb1c4ab4c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h index 6119321f3f5d..295f2d554754 100644 --- a/tools/testing/selftests/kvm/include/x86_64/mce.h +++ b/tools/testing/selftests/kvm/include/x86/mce.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/mce.h - * * Copyright (C) 2022, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..72575eadb63a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -5,8 +5,11 @@ #ifndef SELFTEST_KVM_PMU_H #define SELFTEST_KVM_PMU_H +#include <stdbool.h> #include <stdint.h> +#include <linux/bits.h> + #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 /* @@ -61,6 +64,11 @@ #define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) #define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) #define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) +#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02) +#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00) +#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01) +#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02) +#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01) #define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) #define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) @@ -80,6 +88,11 @@ enum intel_pmu_architectural_events { INTEL_ARCH_BRANCHES_RETIRED_INDEX, INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX, + INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_RETIRING_INDEX, + INTEL_ARCH_LBR_INSERTS_INDEX, NR_INTEL_ARCH_EVENTS, }; @@ -94,4 +107,17 @@ enum amd_pmu_zen_events { extern const uint64_t intel_pmu_arch_events[]; extern const uint64_t amd_pmu_zen_events[]; +enum pmu_errata { + INSTRUCTIONS_RETIRED_OVERCOUNT, + BRANCHES_RETIRED_OVERCOUNT, +}; +extern uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void); + +static inline bool this_pmu_has_errata(enum pmu_errata errata) +{ + return pmu_errata_mask & BIT_ULL(errata); +} + #endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index c0c7c1fe93f9..57d62a425109 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/processor.h - * * Copyright (C) 2018, Google LLC. */ @@ -23,12 +21,21 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; +extern uint64_t guest_tsc_khz; + +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + +#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" #define NMI_VECTOR 0x02 +const char *ex_str(int vector); + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -178,6 +185,9 @@ struct kvm_x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23) +#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24) +#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28) #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) @@ -192,8 +202,12 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4) +#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) +#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) /* * KVM defined paravirt features. @@ -253,7 +267,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) -#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) @@ -280,6 +294,8 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) +#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3) +#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -318,6 +334,11 @@ struct kvm_x86_pmu_feature { #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) #define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) +#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8) +#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9) +#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10) +#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11) +#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12) #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) #define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) @@ -566,6 +587,11 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline void set_idt(const struct desc_ptr *idt_desc) +{ + __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); +} + static inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -816,6 +842,23 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } +static inline void udelay(unsigned long usec) +{ + uint64_t start, now, cycles; + + GUEST_ASSERT(guest_tsc_khz); + cycles = guest_tsc_khz / 1000 * usec; + + /* + * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is + * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. + */ + start = rdtsc(); + do { + now = rdtsc(); + } while (now - start < cycles); +} + #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ @@ -890,8 +933,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); static inline uint32_t kvm_cpu_fms(void) { @@ -991,12 +1032,20 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) } void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -1017,7 +1066,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -1027,7 +1076,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, @@ -1108,7 +1157,6 @@ do { \ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -1138,6 +1186,12 @@ struct idt_entry { void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be + * used to signal "no expcetion". + */ +#define KVM_MAGIC_DE_VECTOR 0xff + /* If a toddler were to say "abracadabra". */ #define KVM_EXCEPTION_MAGIC 0xabacadabaULL @@ -1211,7 +1265,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, uint64_t ign_error_code; \ uint8_t vector; \ \ - asm volatile(KVM_ASM_SAFE(insn) \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ : inputs \ : KVM_ASM_SAFE_CLOBBERS); \ @@ -1273,6 +1327,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + static inline bool kvm_is_pmu_enabled(void) { return get_kvm_param_bool("enable_pmu"); @@ -1283,6 +1357,16 @@ static inline bool kvm_is_forced_emulation_enabled(void) return !!get_kvm_param_integer("force_emulation_prefix"); } +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + +static inline bool kvm_is_ignore_msrs(void) +{ + return get_kvm_param_bool("ignore_msrs"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); @@ -1306,6 +1390,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, GUEST_ASSERT(!ret); } +/* + * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's + * intended to be a wake event arrives *after* HLT is executed. Modern CPUs, + * except for a few oddballs that KVM is unlikely to run on, block IRQs for one + * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may + * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too. + */ +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +/* + * Enable interrupts and ensure that interrupts are evaluated upon return from + * this function, i.e. execute a nop to consume the STi interrupt shadow. + */ +static inline void sti_nop(void) +{ + asm volatile ("sti; nop"); +} + +/* + * Enable interrupts for one instruction (nop), to allow the CPU to process all + * interrupts that are already pending. + */ +static inline void sti_nop_cli(void) +{ + asm volatile ("sti; nop; cli"); +} + +static inline void sti(void) +{ + asm volatile("sti"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ @@ -1317,7 +1441,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, - PG_LEVEL_NUM + PG_LEVEL_256T }; #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 82c11c81a956..008b4169f5e2 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -25,19 +25,51 @@ enum sev_guest_state { #define SEV_POLICY_NO_DBG (1UL << 0) #define SEV_POLICY_ES (1UL << 2) +#define SNP_POLICY_SMT (1ULL << 16) +#define SNP_POLICY_RSVD_MBO (1ULL << 17) +#define SNP_POLICY_DBG (1ULL << 19) + #define GHCB_MSR_TERM_REQ 0x100 +static inline bool is_sev_snp_vm(struct kvm_vm *vm) +{ + return vm->type == KVM_X86_SNP_VM; +} + +static inline bool is_sev_es_vm(struct kvm_vm *vm) +{ + return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM; +} + +static inline bool is_sev_vm(struct kvm_vm *vm) +{ + return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM; +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy); +void snp_vm_launch_update(struct kvm_vm *vm); +void snp_vm_launch_finish(struct kvm_vm *vm); struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); /* + * A SEV-SNP VM requires the policy reserved bit to always be set. + * The SMT policy bit is also required to be set based on SMT being + * available and active on the system. + */ +static inline u64 snp_default_policy(void) +{ + return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0); +} + +/* * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" * instead of a proper struct. The size of the parameter is embedded in the * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by @@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); void sev_vm_init(struct kvm_vm *vm); void sev_es_vm_init(struct kvm_vm *vm); +void snp_vm_init(struct kvm_vm *vm); + +static inline void vmgexit(void) +{ + __asm__ __volatile__("rep; vmmcall"); +} static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) @@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); } +static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, + uint64_t hva, uint64_t size, uint8_t type) +{ + struct kvm_sev_snp_launch_update update_data = { + .uaddr = hva, + .gfn_start = gpa >> PAGE_SHIFT, + .len = size, + .type = type, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data); +} + #endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h index 4803e1056055..29cffd0a9181 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm.h - * This is a copy of arch/x86/include/asm/svm.h - * - */ - #ifndef SELFTEST_KVM_SVM_H #define SELFTEST_KVM_SVM_H diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index 044f0f872ba9..b74c6dcddcbd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -1,8 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/svm_utils.h - * Header for nested SVM testing - * * Copyright (C) 2020, Red Hat, Inc. */ diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h index d3825dcc3cd9..d3825dcc3cd9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ b/tools/testing/selftests/kvm/include/x86/ucall.h diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5f0c0a29c556..96e2b4c630a9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/vmx.h - * * Copyright (C) 2018, Google LLC. */ @@ -570,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..5d7590d01868 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + struct kvm_vcpu *unused; + int r, i; + + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index f02355c3c4c2..b7dbde9c0843 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -239,14 +239,14 @@ int main(int argc, char *argv[]) * single stats file works and doesn't cause explosions. */ vm_stats_fds = vm_get_stats_fd(vms[i]); - stats_test(dup(vm_stats_fds)); + stats_test(kvm_dup(vm_stats_fds)); /* Verify userspace can instantiate multiple stats files. */ stats_test(vm_get_stats_fd(vms[i])); for (j = 0; j < max_vcpu; ++j) { vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]); - stats_test(dup(vcpu_stats_fds[j])); + stats_test(kvm_dup(vcpu_stats_fds[j])); stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j])); } diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index c78f34699f73..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include "test_util.h" @@ -39,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 7abbf8866512..b023868fe0b8 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_irq_set_config(intid, is_edge); } + +void gic_irq_set_group(unsigned int intid, bool group) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_group(intid, group); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index d24e9ecc96c6..b6a7e30c3eb1 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -25,6 +25,7 @@ struct gic_common_ops { void (*gic_irq_clear_pending)(uint32_t intid); bool (*gic_irq_get_pending)(uint32_t intid); void (*gic_irq_set_config)(uint32_t intid, bool is_edge); + void (*gic_irq_set_group)(uint32_t intid, bool group); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 66d05506f78b..50754a27f493 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base) } } +static void gicv3_set_group(uint32_t intid, bool grp) +{ + uint32_t cpu_or_dist; + uint32_t val; + + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); + if (grp) + val |= BIT(intid % 32); + else + val &= ~BIT(intid % 32); + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); +} + static void gicv3_cpu_init(unsigned int cpu) { volatile void *sgi_base; unsigned int i; volatile void *redist_base_cpu; + u64 typer; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); + /* Verify assumption that GICR_TYPER.Processor_number == cpu */ + typer = readq_relaxed(redist_base_cpu + GICR_TYPER); + GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu); + gicv3_enable_redist(redist_base_cpu); /* @@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu) /* Set a default priority threshold */ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); + /* Disable Group-0 interrupts */ + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); /* Enable non-secure Group-1 interrupts */ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } @@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_clear_pending = gicv3_irq_clear_pending, .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, + .gic_irq_set_group = gicv3_set_group, }; void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..7f9fdcf42ae6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); @@ -246,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id) its_send_cmd(cmdq_base, &cmd); } + +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_SYNC); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S index 0e443eadfac6..0e443eadfac6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 0ac7cc89f38c..d46e4b13b92c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" +#include "vgic.h" #include <linux/bitfield.h> #include <linux/sizes.h> @@ -72,13 +73,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +91,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +129,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +149,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +172,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -177,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) { uint64_t *ptep; @@ -187,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -215,6 +230,11 @@ unmapped_gva: exit(EXIT_FAILURE); } +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); @@ -258,55 +278,73 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; uint64_t sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,49 +357,55 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); @@ -387,7 +431,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); return vcpu; } @@ -450,14 +494,14 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) } struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; }; void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -469,7 +513,7 @@ void route_exception(struct ex_regs *regs, int vector) switch (vector) { case VECTOR_SYNC_CURRENT: case VECTOR_SYNC_LOWER_64: - ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(read_sysreg(esr_el1)); valid_ec = true; break; case VECTOR_IRQ_CURRENT: @@ -508,7 +552,7 @@ void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, assert(VECTOR_IS_SYNC(vector)); assert(vector < VECTOR_NUM); - assert(ec < ESR_EC_NUM); + assert(ec <= ESR_ELx_EC_MAX); handlers->exception_handlers[vector][ec] = handler; } @@ -565,15 +609,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); @@ -639,3 +683,50 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm) sparsebit_set_num(vm->vpages_valid, 0, (1ULL << vm->va_bits) >> vm->page_shift); } + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c index a076e780be5d..a076e780be5d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..ddab0ce89d4d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..d0f7bd0984b8 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -15,6 +15,17 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * @@ -30,24 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); @@ -73,10 +68,39 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ad00e4761886..8279b6ced8d2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -12,6 +12,7 @@ #include <assert.h> #include <sched.h> #include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -21,18 +22,31 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; +static uint32_t last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -46,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -62,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, @@ -78,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -102,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -118,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * @@ -195,6 +182,11 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } const char *vm_guest_mode_string(uint32_t i) @@ -209,12 +201,13 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", - [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, @@ -235,12 +228,13 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, @@ -312,27 +306,30 @@ struct kvm_vm *____vm_create(struct vm_shape shape) case VM_MODE_P36V48_16K: vm->pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); kvm_init_vm_address_properties(vm); - /* - * Ignore KVM support for 5-level paging (vm->va_bits == 57), - * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this mode (48-bit virtual address space). - */ - TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, - "Linear address width (%d bits) not supported", - vm->va_bits); + pr_debug("Guest physical address width detected: %d\n", vm->pa_bits); - vm->pgtable_levels = 4; - vm->va_bits = 48; + pr_debug("Guest virtual address width detected: %d\n", + vm->va_bits); + + if (vm->va_bits == 57) { + vm->pgtable_levels = 5; + } else { + TEST_ASSERT(vm->va_bits == 48, + "Unexpected guest virtual address width: %d", + vm->va_bits); + vm->pgtable_levels = 4; + } #else - TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: @@ -405,6 +402,47 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } +void kvm_set_files_rlimit(uint32_t nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + +static bool is_guest_memfd_required(struct vm_shape shape) +{ +#ifdef __x86_64__ + return shape.type == KVM_X86_SNP_VM; +#else + return false; +#endif +} + struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { @@ -412,14 +450,24 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; - int i; + int i, flags; + + kvm_set_files_rlimit(nr_runnable_vcpus); pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); vm = ____vm_create(shape); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + /* + * Force GUEST_MEMFD for the primary memory region if necessary, e.g. + * for CoCo VMs that require GUEST_MEMFD backed private memory. + */ + flags = 0; + if (is_guest_memfd_required(shape)) + flags |= KVM_MEM_GUEST_MEMFD; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags); for (i = 0; i < NR_MEM_REGIONS; i++) vm->memslots[i] = 0; @@ -434,11 +482,14 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - pr_info("Random seed: 0x%x\n", guest_random_seed); + if (guest_random_seed != last_guest_seed) { + pr_info("Random seed: 0x%x\n", guest_random_seed); + last_guest_seed = guest_random_seed; + } guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -476,6 +527,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -541,15 +593,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -603,7 +654,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -653,6 +704,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + if (stats->fd < 0) + return; + + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } + + kvm_close(stats->fd); + stats->fd = -1; +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -672,19 +737,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) */ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - int ret; - if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); - ret = close(vcpu->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vcpu->fd); + kvm_stats_release(&vcpu->stats); list_del(&vcpu->list); @@ -695,41 +756,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) void kvm_vm_release(struct kvm_vm *vmp) { struct kvm_vcpu *vcpu, *tmp; - int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); - ret = close(vmp->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vmp->fd); + kvm_close(vmp->kvm_fd); + + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); - ret = close(vmp->kvm_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region, - bool unlink) + struct userspace_mem_region *region) { - int ret; - - if (unlink) { - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); - rb_erase(®ion->hva_node, &vm->regions.hva_tree); - hash_del(®ion->slot_node); - } - - region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -750,15 +802,9 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) - __vm_mem_region_delete(vmp, region, false); + __vm_mem_region_delete(vmp, region); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -773,7 +819,7 @@ void kvm_vm_free(struct kvm_vm *vmp) int kvm_memfd_alloc(size_t size, bool hugepages) { int memfd_flags = MFD_CLOEXEC; - int fd, r; + int fd; if (hugepages) memfd_flags |= MFD_HUGETLB; @@ -781,85 +827,12 @@ int kvm_memfd_alloc(size_t size, bool hugepages) fd = memfd_create("kvm_selftest", memfd_flags); TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); - r = ftruncate(fd, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); - - r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_ftruncate(fd, size); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); return fd; } -/* - * Memory Compare, host virtual to guest virtual - * - * Input Args: - * hva - Starting host virtual address - * vm - Virtual Machine - * gva - Starting guest virtual address - * len - number of bytes to compare - * - * Output Args: None - * - * Input/Output Args: None - * - * Return: - * Returns 0 if the bytes starting at hva for a length of len - * are equal the guest virtual bytes starting at gva. Returns - * a value < 0, if bytes at hva are less than those at gva. - * Otherwise a value > 0 is returned. - * - * Compares the bytes starting at the host virtual address hva, for - * a length of len, to the guest bytes starting at the guest virtual - * address given by gva. - */ -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) -{ - size_t amt; - - /* - * Compare a batch of bytes until either a match is found - * or all the bytes have been compared. - */ - for (uintptr_t offset = 0; offset < len; offset += amt) { - uintptr_t ptr1 = (uintptr_t)hva + offset; - - /* - * Determine host address for guest virtual address - * at offset. - */ - uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - - /* - * Determine amount to compare on this pass. - * Don't allow the comparsion to cross a page boundary. - */ - amt = len - offset; - if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr1 % vm->page_size); - if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr2 % vm->page_size); - - assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); - assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - - /* - * Perform the comparison. If there is a difference - * return that result to the caller, otherwise need - * to continue on looking for a mismatch. - */ - int ret = memcmp((void *)ptr1, (void *)ptr2, amt); - if (ret != 0) - return ret; - } - - /* - * No mismatch found. Let the caller know the two memory - * areas are equal. - */ - return 0; -} - static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, struct userspace_mem_region *region) { @@ -972,8 +945,8 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; @@ -987,30 +960,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); - TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" - " guest_paddr: 0x%lx vm->page_size: 0x%x", - guest_paddr, vm->page_size); - TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + " gpa: 0x%lx vm->page_size: 0x%x", + gpa, vm->page_size); + TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1) <= vm->max_gfn, "Physical range beyond maximum " "supported physical address,\n" - " guest_paddr: 0x%lx npages: 0x%lx\n" + " gpa: 0x%lx npages: 0x%lx\n" " vm->max_gfn: 0x%lx vm->page_size: 0x%x", - guest_paddr, npages, vm->max_gfn, vm->page_size); + gpa, npages, vm->max_gfn, vm->page_size); /* * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); + vm, gpa, (gpa + npages * vm->page_size) - 1); if (region != NULL) TEST_FAIL("overlapping userspace_mem_region already " "exists\n" - " requested guest_paddr: 0x%lx npages: 0x%lx " - "page_size: 0x%x\n" - " existing guest_paddr: 0x%lx size: 0x%lx", - guest_paddr, npages, vm->page_size, + " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" + " existing gpa: 0x%lx size: 0x%lx", + gpa, npages, vm->page_size, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); @@ -1024,8 +996,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", - slot, guest_paddr, npages, - region->region.slot, + slot, gpa, npages, region->region.slot, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); } @@ -1051,7 +1022,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -1062,12 +1033,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1102,8 +1070,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, * needing to track if the fd is owned by the framework * or by the caller. */ - guest_memfd = dup(guest_memfd); - TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + guest_memfd = kvm_dup(guest_memfd); } region->region.guest_memfd = guest_memfd; @@ -1115,20 +1082,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->unused_phy_pages = sparsebit_alloc(); if (vm_arch_has_protected_memory(vm)) region->protected_phy_pages = sparsebit_alloc(); - sparsebit_set_num(region->unused_phy_pages, - guest_paddr >> vm->page_shift, npages); + sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages); region->region.slot = slot; region->region.flags = flags; - region->region.guest_phys_addr = guest_paddr; + region->region.guest_phys_addr = gpa; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", - ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size, + " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d", + ret, errno, slot, flags, gpa, region->region.memory_size, region->region.guest_memfd); /* Add to quick lookup data structures */ @@ -1138,12 +1103,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1152,10 +1115,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, - uint64_t npages, uint32_t flags) + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags) { - vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); + vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } /* @@ -1221,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) +{ + struct userspace_mem_region *region = memslot2region(vm, slot); + struct kvm_userspace_memory_region2 tmp = region->region; + + tmp.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); +} + /* * VM Memory Region Move * @@ -1266,7 +1239,12 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot), true); + struct userspace_mem_region *region = memslot2region(vm, slot); + + region->region.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + __vm_mem_region_delete(vm, region); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, @@ -1298,14 +1276,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1346,12 +1324,15 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -1468,8 +1449,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr); - - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; @@ -1583,7 +1562,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); vaddr += page_size; paddr += page_size; @@ -1702,7 +1680,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } @@ -1715,7 +1704,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } @@ -1781,9 +1771,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); @@ -2024,9 +2013,9 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), + KVM_EXIT_STRING(ARM_SEA), }; /* @@ -2264,49 +2253,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); +} + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ +} + +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } @@ -2314,12 +2296,37 @@ __weak void kvm_selftest_arch_init(void) { } +static void report_unexpected_signal(int signum) +{ +#define KVM_CASE_SIGNUM(sig) \ + case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum) + + switch (signum) { + KVM_CASE_SIGNUM(SIGBUS); + KVM_CASE_SIGNUM(SIGSEGV); + KVM_CASE_SIGNUM(SIGILL); + KVM_CASE_SIGNUM(SIGFPE); + default: + TEST_FAIL("Unexpected signal %d\n", signum); + } +} + void __attribute((constructor)) kvm_selftest_init(void) { + struct sigaction sig_sa = { + .sa_handler = report_unexpected_signal, + }; + /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); - guest_random_seed = random(); + sigaction(SIGBUS, &sig_sa, NULL); + sigaction(SIGSEGV, &sig_sa, NULL); + sigaction(SIGILL, &sig_sa, NULL); + sigaction(SIGFPE, &sig_sa, NULL); + + guest_random_seed = last_guest_seed = random(); + pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } @@ -2338,3 +2345,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) pg = paddr >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..3f1e4b67c5ae --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_PRMD + st.d t0, sp, PRMD_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + ld.d t0, sp, PC_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_ERA + ld.d t0, sp, PRMD_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_PRMD + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..07c103369ddb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include <asm/kvm.h> +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static vm_paddr_t invalid_pgtable[4]; +static vm_vaddr_t exception_handlers; + +static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + unsigned int shift; + uint64_t mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child) +{ + uint64_t *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + vm_paddr_t child, table; + + if (vm->pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->pgd = table; + vm->pgd_created = true; +} + +static int virt_pte_none(uint64_t *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) +{ + int level; + uint64_t *ptep; + vm_paddr_t child; + + if (!vm->pgd_created) + goto unmapped_gva; + + child = vm->pgd; + level = vm->pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint32_t prot_bits; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, vaddr, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, paddr | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ + uint64_t pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level; + + if (!vm->pgd_created) + return; + + level = vm->pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + int vector; + unsigned long pc, estat, badv; + struct handlers *handlers; + + handlers = (struct handlers *)exception_handlers; + vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + if (handlers && handlers->exception_handlers[vector]) + return handlers->exception_handlers[vector](regs); + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + void *addr; + + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + + addr = addr_gva2hva(vm, vm->handlers); + memset(addr, 0, vm->page_size); + exception_handlers = vm->handlers; + sync_global_to_guest(vm, exception_handlers); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector] = handler; +} + +uint32_t guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, uint64_t); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + __vcpu_set_reg(vcpu, id, val); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + /* time count start from 0 */ + val = 0; + loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val); + + width = vm->page_shift - 3; + + switch (vm->pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_vaddr_alloc(vm, vm->page_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + size_t stack_size; + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_vaddr + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (uint64_t)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c new file mode 100644 index 000000000000..fc6cbb50573f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + */ +#include "kvm_util.h" + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +vm_vaddr_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + "Unexpected ucall exit mmio address access"); + + return (void *)(*((uint64_t *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c new file mode 100644 index 000000000000..46a14fd63d9e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + */ + +#include <time.h> + +#include "lru_gen_util.h" + +/* + * Tracks state while we parse memcg lru_gen stats. The file we're parsing is + * structured like this (some extra whitespace elided): + * + * memcg (id) (path) + * node (id) + * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages) + */ +struct memcg_stats_parse_context { + bool consumed; /* Whether or not this line was consumed */ + /* Next parse handler to invoke */ + void (*next_handler)(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + int current_node_idx; /* Current index in nodes array */ + const char *name; /* The name of the memcg we're looking for */ +}; + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + +struct split_iterator { + char *str; + char *save; +}; + +static char *split_next(struct split_iterator *it) +{ + char *ret = strtok_r(it->str, " \t\n\r", &it->save); + + it->str = NULL; + return ret; +} + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *memcg_id = split_next(&it); + char *memcg_name = split_next(&it); + char *end; + + ctx->consumed = true; + + if (!prefix || strcmp("memcg", prefix)) + return; /* Not a memcg line (maybe empty), skip */ + + TEST_ASSERT(memcg_id && memcg_name, + "malformed memcg line; no memcg id or memcg_name"); + + if (strcmp(memcg_name + 1, ctx->name)) + return; /* Wrong memcg, skip */ + + /* Found it! */ + + stats->memcg_id = strtoul(memcg_id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id); + if (!stats->memcg_id) + return; /* Removed memcg? */ + + ctx->next_handler = memcg_stats_handle_in_memcg; +} + +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *id = split_next(&it); + long found_node_id; + char *end; + + ctx->consumed = true; + ctx->current_node_idx = -1; + + if (!prefix) + return; /* Skip empty lines */ + + if (!strcmp("memcg", prefix)) { + /* Memcg done, found next one; stop. */ + ctx->next_handler = NULL; + return; + } else if (strcmp("node", prefix)) + TEST_ASSERT(false, "found malformed line after 'memcg ...'," + "token: '%s'", prefix); + + /* At this point we know we have a node line. Parse the ID. */ + + TEST_ASSERT(id, "malformed node line; no node id"); + + found_node_id = strtol(id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed node id '%s'", id); + + ctx->current_node_idx = stats->nr_nodes++; + TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES, + "memcg has stats for too many nodes, max is %d", + MAX_NR_NODES); + stats->nodes[ctx->current_node_idx].node = found_node_id; + + ctx->next_handler = memcg_stats_handle_in_node; +} + +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + char *my_line = strdup(line); + struct split_iterator it = { .str = my_line }; + char *gen, *age, *nr_anon, *nr_file; + struct node_stats *node_stats; + struct generation_stats *gen_stats; + char *end; + + TEST_ASSERT(it.str, "failed to copy input line"); + + gen = split_next(&it); + + if (!gen) + goto out_consume; /* Skip empty lines */ + + if (!strcmp("memcg", gen) || !strcmp("node", gen)) { + /* + * Reached next memcg or node section. Don't consume, let the + * other handler deal with this. + */ + ctx->next_handler = memcg_stats_handle_in_memcg; + goto out; + } + + node_stats = &stats->nodes[ctx->current_node_idx]; + TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS, + "found too many generation lines; max is %d", + MAX_NR_GENS); + gen_stats = &node_stats->gens[node_stats->nr_gens++]; + + age = split_next(&it); + nr_anon = split_next(&it); + nr_file = split_next(&it); + + TEST_ASSERT(age && nr_anon && nr_file, + "malformed generation line; not enough tokens"); + + gen_stats->gen = (int)strtol(gen, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen); + + gen_stats->age_ms = strtol(age, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age); + + gen_stats->nr_anon = strtol(nr_anon, &end, 10); + TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'", + nr_anon); + + gen_stats->nr_file = strtol(nr_file, &end, 10); + TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file); + +out_consume: + ctx->consumed = true; +out: + free(my_line); +} + +static void print_memcg_stats(const struct memcg_stats *stats, const char *name) +{ + int node, gen; + + pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id); + for (node = 0; node < stats->nr_nodes; ++node) { + pr_debug("\tnode %d\n", stats->nodes[node].node); + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + const struct generation_stats *gstats = + &stats->nodes[node].gens[gen]; + + pr_debug("\t\tgen %d\tage_ms %ld" + "\tnr_anon %ld\tnr_file %ld\n", + gstats->gen, gstats->age_ms, gstats->nr_anon, + gstats->nr_file); + } + } +} + +/* Re-read lru_gen debugfs information for @memcg into @stats. */ +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg) +{ + FILE *f; + ssize_t read = 0; + char *line = NULL; + size_t bufsz; + struct memcg_stats_parse_context ctx = { + .next_handler = memcg_stats_handle_searching, + .name = memcg, + }; + + memset(stats, 0, sizeof(struct memcg_stats)); + + f = fopen(LRU_GEN_DEBUGFS, "r"); + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + + while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) { + ctx.consumed = false; + + do { + ctx.next_handler(stats, &ctx, line); + if (!ctx.next_handler) + break; + } while (!ctx.consumed); + } + + if (read < 0 && !feof(f)) + TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS); + + TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n" + "Did the memcg get created in the proper mount?", + memcg); + if (line) + free(line); + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); + + print_memcg_stats(stats, memcg); +} + +/* + * Find all pages tracked by lru_gen for this memcg in generation @target_gen. + * + * If @target_gen is negative, look for all generations. + */ +long lru_gen_sum_memcg_stats_for_gen(int target_gen, + const struct memcg_stats *stats) +{ + int node, gen; + long total_nr = 0; + + for (node = 0; node < stats->nr_nodes; ++node) { + const struct node_stats *node_stats = &stats->nodes[node]; + + for (gen = 0; gen < node_stats->nr_gens; ++gen) { + const struct generation_stats *gen_stats = + &node_stats->gens[gen]; + + if (target_gen >= 0 && gen_stats->gen != target_gen) + continue; + + total_nr += gen_stats->nr_anon + gen_stats->nr_file; + } + } + + return total_nr; +} + +/* Find all pages tracked by lru_gen for this memcg. */ +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats) +{ + return lru_gen_sum_memcg_stats_for_gen(-1, stats); +} + +/* + * If lru_gen aging should force page table scanning. + * + * If you want to set this to false, you will need to do eviction + * before doing extra aging passes. + */ +static const bool force_scan = true; + +static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen) +{ + FILE *f = fopen(LRU_GEN_DEBUGFS, "w"); + char *command; + size_t sz; + + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + sz = asprintf(&command, "+ %lu %d %d 1 %d\n", + memcg_id, node_id, max_gen, force_scan); + TEST_ASSERT(sz > 0, "creating aging command failed"); + + pr_debug("Running aging command: %s", command); + if (fwrite(command, sizeof(char), sz, f) < sz) { + TEST_ASSERT(false, "writing aging command %s to %s failed", + command, LRU_GEN_DEBUGFS); + } + + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); +} + +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg) +{ + int node, gen; + + pr_debug("lru_gen: invoking aging...\n"); + + /* Must read memcg stats to construct the proper aging command. */ + lru_gen_read_memcg_stats(stats, memcg); + + for (node = 0; node < stats->nr_nodes; ++node) { + int max_gen = 0; + + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + int this_gen = stats->nodes[node].gens[gen].gen; + + max_gen = max_gen > this_gen ? max_gen : this_gen; + } + + run_aging_impl(stats->memcg_id, stats->nodes[node].node, + max_gen); + } + + /* Re-read so callers get updated information */ + lru_gen_read_memcg_stats(stats, memcg); +} + +/* + * Find which generation contains at least @pages pages, assuming that + * such a generation exists. + */ +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long pages) +{ + int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1; + + for (node = 0; node < stats->nr_nodes; ++node) + for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens; + ++gen_idx) { + gen = stats->nodes[node].gens[gen_idx].gen; + max_gen = gen > max_gen ? gen : max_gen; + min_gen = gen < min_gen ? gen : min_gen; + } + + for (gen = min_gen; gen <= max_gen; ++gen) + /* See if this generation has enough pages. */ + if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages) + return gen; + + return -1; +} + +bool lru_gen_usable(void) +{ + long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK; + int lru_gen_fd, lru_gen_debug_fd; + char mglru_feature_str[8] = {}; + long mglru_features; + + lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY); + if (lru_gen_fd < 0) { + puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH); + return false; + } + if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) { + puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH); + close(lru_gen_fd); + return false; + } + close(lru_gen_fd); + + mglru_features = strtol(mglru_feature_str, NULL, 16); + if ((mglru_features & required_features) != required_features) { + printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n", + mglru_features, required_features); + printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n", + required_features); + return false; + } + + lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR); + __TEST_REQUIRE(lru_gen_debug_fd >= 0, + "lru_gen: Could not open " LRU_GEN_DEBUGFS ", " + "but lru_gen is enabled, so cannot use page_idle."); + close(lru_gen_debug_fd); + return true; +} diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..557c0a0a5658 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S index aa0abd3f35bb..b787b982e922 100644 --- a/tools/testing/selftests/kvm/lib/riscv/handlers.S +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -10,85 +10,88 @@ #include <asm/csr.h> .macro save_context - addi sp, sp, (-8*34) - sd x1, 0(sp) - sd x2, 8(sp) - sd x3, 16(sp) - sd x4, 24(sp) - sd x5, 32(sp) - sd x6, 40(sp) - sd x7, 48(sp) - sd x8, 56(sp) - sd x9, 64(sp) - sd x10, 72(sp) - sd x11, 80(sp) - sd x12, 88(sp) - sd x13, 96(sp) - sd x14, 104(sp) - sd x15, 112(sp) - sd x16, 120(sp) - sd x17, 128(sp) - sd x18, 136(sp) - sd x19, 144(sp) - sd x20, 152(sp) - sd x21, 160(sp) - sd x22, 168(sp) - sd x23, 176(sp) - sd x24, 184(sp) - sd x25, 192(sp) - sd x26, 200(sp) - sd x27, 208(sp) - sd x28, 216(sp) - sd x29, 224(sp) - sd x30, 232(sp) - sd x31, 240(sp) + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) csrr s0, CSR_SEPC csrr s1, CSR_SSTATUS - csrr s2, CSR_SCAUSE - sd s0, 248(sp) + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) sd s1, 256(sp) sd s2, 264(sp) + sd s3, 272(sp) .endm .macro restore_context + ld s3, 272(sp) ld s2, 264(sp) ld s1, 256(sp) - ld s0, 248(sp) - csrw CSR_SCAUSE, s2 + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 csrw CSR_SSTATUS, s1 csrw CSR_SEPC, s0 - ld x31, 240(sp) - ld x30, 232(sp) - ld x29, 224(sp) - ld x28, 216(sp) - ld x27, 208(sp) - ld x26, 200(sp) - ld x25, 192(sp) - ld x24, 184(sp) - ld x23, 176(sp) - ld x22, 168(sp) - ld x21, 160(sp) - ld x20, 152(sp) - ld x19, 144(sp) - ld x18, 136(sp) - ld x17, 128(sp) - ld x16, 120(sp) - ld x15, 112(sp) - ld x14, 104(sp) - ld x13, 96(sp) - ld x12, 88(sp) - ld x11, 80(sp) - ld x10, 72(sp) - ld x9, 64(sp) - ld x8, 56(sp) - ld x7, 48(sp) - ld x6, 40(sp) - ld x5, 32(sp) - ld x4, 24(sp) - ld x3, 16(sp) - ld x2, 8(sp) - ld x1, 0(sp) - addi sp, sp, (8*34) + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) .endm .balign 4 diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 6ae47b3d6b25..2eac7d4b59e9 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); @@ -402,7 +402,7 @@ struct handlers { exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; }; -void route_exception(struct ex_regs *regs) +void route_exception(struct pt_regs *regs) { struct handlers *handlers = (struct handlers *)exception_handlers; int vector = 0, ec; diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..2c432fa164f1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 4ad4492eea1d..8ceeb17c819a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); if (vm->pgd_created) @@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) } /* Fill in page table entry */ - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ if (!(entry[idx] & PAGE_INVALID)) fprintf(stderr, "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); @@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) int ri, idx; uint64_t *entry; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); entry = addr_gpa2hva(vm, vm->pgd); @@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ TEST_ASSERT(!(entry[idx] & PAGE_INVALID), "No page mapping for vm virtual address 0x%lx", gva); @@ -168,7 +168,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_sregs sregs; struct kvm_vcpu *vcpu; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); stack_vaddr = __vm_vaddr_alloc(vm, stack_size, @@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c index cca98734653d..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..a99188f87a38 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8ed0b74ae837..8a1848586a85 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,13 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. @@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret; - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path); return ret == 0; } -size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret; - TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path); ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path); + fclose(f); return size; } +size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + +bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64]; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 7c9de8414462..5bde176cedd5 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S index 7629819734af..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c new file mode 100644 index 000000000000..15bc8cd583aa --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include <stdint.h> +#include "processor.h" +#include "hyperv.h" + +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index d61e623afc8c..0b1f288ad556 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to memstress.c. + * x86-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); /* * Identity map the first 4G and the test region with 1G pages so that diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c new file mode 100644 index 000000000000..34cb57d1d671 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Tencent, Inc. + */ + +#include <stdint.h> + +#include <linux/kernel.h> + +#include "kvm_util.h" +#include "processor.h" +#include "pmu.h" + +const uint64_t intel_pmu_arch_events[] = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, +}; +kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); + +const uint64_t amd_pmu_zen_events[] = { + AMD_ZEN_CORE_CYCLES, + AMD_ZEN_INSTRUCTIONS_RETIRED, + AMD_ZEN_BRANCHES_RETIRED, + AMD_ZEN_BRANCHES_MISPREDICTED, +}; +kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static uint64_t get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 594b061aef52..36104d27f3d9 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1,13 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2018, Google LLC. */ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" #include "sev.h" @@ -19,12 +18,44 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -#define MAX_NR_CPUID_ENTRIES 100 - vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; bool is_forced_emulation_enabled; +uint64_t guest_tsc_khz; + +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -127,10 +158,10 @@ bool kvm_is_tdp_enabled(void) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); - /* If needed, create page map l4 table. */ + /* If needed, create the top-level page table. */ if (!vm->pgd_created) { vm->pgd = vm_alloc_page_table(vm); vm->pgd_created = true; @@ -187,11 +218,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + uint64_t *pte = &vm->pgd; + int current_level; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % pg_size) == 0, "Virtual address not aligned,\n" @@ -212,20 +243,17 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_create_upper_pte(vm, pte, vaddr, paddr, + current_level, level); + if (*pte & PTE_LARGE_MASK) + return; + } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); @@ -258,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, for (i = 0; i < nr_pages; i++) { __virt_pg_map(vm, vaddr, paddr, level); + sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, + nr_bytes / PAGE_SIZE); vaddr += pg_size; paddr += pg_size; @@ -279,40 +309,38 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level) { - uint64_t *pml4e, *pdpe, *pde; + int va_width = 12 + (vm->pgtable_levels) * 9; + uint64_t *pte = &vm->pgd; + int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), "Invalid virtual address, vaddr: 0x%lx", vaddr); /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), + * Check that the vaddr is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); - - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; - - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; - - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; + TEST_ASSERT(vaddr == + (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), + "Canonical check failed. The virtual address is invalid."); + + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_get_pte(vm, pte, vaddr, current_level); + if (vm_is_target_pte(pte, level, current_level)) + return pte; + } - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); } uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) @@ -495,7 +523,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; - TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); @@ -507,6 +536,10 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -520,6 +553,20 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -544,7 +591,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return false; if (regs->vector == DE_VECTOR) - return false; + regs->vector = KVM_MAGIC_DE_VECTOR; regs->rip = regs->r11; regs->r9 = regs->vector; @@ -565,10 +612,8 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } static void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -610,24 +655,35 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + if (get_ucall(vcpu, &uc) == UCALL_ABORT) REPORT_GUEST_ASSERT(uc); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) { + int r; + + TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), + "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); + vm_create_irqchip(vm); vm_init_descriptor_tables(vm); sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } + + r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); + guest_tsc_khz = r; + sync_global_to_guest(vm, guest_tsc_khz); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -668,6 +724,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -679,6 +736,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } @@ -1127,7 +1191,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; @@ -1184,65 +1248,6 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ @@ -1294,21 +1299,13 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); + + kvm_init_pmu_errata(); } bool sys_clocksource_is_based_on_tsc(void) @@ -1321,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..c3a9838f4806 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -14,7 +14,8 @@ * and find the first range, but that's correct because the condition * expression would cause us to quit the loop. */ -static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, + uint8_t page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; const vm_paddr_t gpa_base = region->region.guest_phys_addr; @@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio if (!sparsebit_any_set(protected_phy_pages)) return; - sev_register_encrypted_memory(vm, region); + if (!is_sev_snp_vm(vm)) + sev_register_encrypted_memory(vm, region); sparsebit_for_each_set_range(protected_phy_pages, i, j) { const uint64_t size = (j - i + 1) * vm->page_size; const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; - sev_launch_update_data(vm, gpa_base + offset, size); + if (private) + vm_mem_set_private(vm, gpa_base + offset, size); + + if (is_sev_snp_vm(vm)) + snp_launch_update_data(vm, gpa_base + offset, + (uint64_t)addr_gpa2hva(vm, gpa_base + offset), + size, page_type); + else + sev_launch_update_data(vm, gpa_base + offset, size); + } } void sev_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } @@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm) void sev_es_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_ES_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } +void snp_vm_init(struct kvm_vm *vm) +{ + struct kvm_sev_init init = { 0 }; + + TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) { struct kvm_sev_launch_start launch_start = { @@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) - encrypt_region(vm, region); + encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false); if (policy & SEV_POLICY_ES) vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); @@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy) +{ + struct kvm_sev_snp_launch_start launch_start = { + .policy = policy, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start); +} + +void snp_vm_launch_update(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + int ctr; + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true); + + vm->arch.is_pt_protected = true; +} + +void snp_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_snp_launch_finish launch_finish = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); +} + struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu) { @@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement) { + if (is_sev_snp_vm(vm)) { + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); + + snp_vm_launch_start(vm, policy); + + snp_vm_launch_update(vm); + + snp_vm_launch_finish(vm); + + return; + } + sev_vm_launch(vm, policy); if (!measurement) diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 5495a92dfd5a..d239c2097391 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/svm.c * Helpers used for nested SVM testing * Largely inspired from KVM unit test svm.c * diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..1265cecc7dd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 089b8925b6b2..29b082a58daa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * * Copyright (C) 2018, Google LLC. */ @@ -200,7 +198,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) if (vmx->eptp_gpa) { uint64_t ept_paddr; struct eptPageTablePointer eptp = { - .memory_type = VMX_BASIC_MEM_TYPE_WB, + .memory_type = X86_MEMTYPE_WB, .page_walk_length = 3, /* + 1 */ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, @@ -403,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; uint16_t index; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", + "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", nested_paddr); TEST_ASSERT((nested_paddr % page_size) == 0, "Nested physical address not on page boundary,\n" @@ -536,8 +534,7 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) { TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c deleted file mode 100644 index efb7e7a1354d..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Hyper-V specific functions. - * - * Copyright (C) 2021, Red Hat Inc. - */ -#include <stdint.h> -#include "processor.h" -#include "hyperv.h" - -struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) -{ - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); - struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); - - /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); - hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); - hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); - - /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); - hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); - hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); - hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); - - *p_hv_pages_gva = hv_pages_gva; - return hv; -} - -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) -{ - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - - return 0; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c deleted file mode 100644 index f31f0427c17c..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2023, Tencent, Inc. - */ - -#include <stdint.h> - -#include <linux/kernel.h> - -#include "kvm_util.h" -#include "pmu.h" - -const uint64_t intel_pmu_arch_events[] = { - INTEL_ARCH_CPU_CYCLES, - INTEL_ARCH_INSTRUCTIONS_RETIRED, - INTEL_ARCH_REFERENCE_CYCLES, - INTEL_ARCH_LLC_REFERENCES, - INTEL_ARCH_LLC_MISSES, - INTEL_ARCH_BRANCHES_RETIRED, - INTEL_ARCH_BRANCHES_MISPREDICTED, - INTEL_ARCH_TOPDOWN_SLOTS, -}; -kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); - -const uint64_t amd_pmu_zen_events[] = { - AMD_ZEN_CORE_CYCLES, - AMD_ZEN_INSTRUCTIONS_RETIRED, - AMD_ZEN_BRANCHES_RETIRED, - AMD_ZEN_BRANCHES_MISPREDICTED, -}; -kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c new file mode 100644 index 000000000000..355ecac30954 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates periodic/one-shot constant timer IRQ using + * CSR.TCFG and CSR.TVAL registers. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static void do_idle(void) +{ + unsigned int intid; + unsigned long estat; + + __asm__ __volatile__("idle 0" : : : "memory"); + + estat = csr_read(LOONGARCH_CSR_ESTAT); + intid = !!(estat & BIT(INT_TI)); + + /* Make sure pending timer IRQ arrived */ + GUEST_ASSERT_EQ(intid, 1); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid; + uint32_t cpu = guest_get_vcpuid(); + uint64_t xcnt, val, cfg, xcnt_diff_us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + intid = !!(regs->estat & BIT(INT_TI)); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, 1); + + cfg = timer_get_cfg(); + if (cfg & CSR_TCFG_PERIOD) { + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1); + if (shared_data->nr_iter == 0) + disable_timer(); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + + /* + * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1 + * On virtual machine, its value counts down from BIT_ULL(48) - 1 + */ + val = timer_get_val(); + xcnt = timer_get_cycles(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(val > cfg, + "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx", + val, cfg, xcnt_diff_us); + + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_test_period_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = test_args.nr_iter; + shared_data->xcnt = timer_get_cycles(); + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + timer_set_next_cmp_ms(test_args.timer_period_ms, true); + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + } + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(irq_iter == 0, + "irq_iter = 0x%x.\n" + " Guest period timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + irq_iter); +} + +static void guest_test_oneshot_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_test_emulate_timer(uint32_t cpu) +{ + uint32_t config_iter; + uint64_t xcnt_diff_us, us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + local_irq_disable(); + shared_data->nr_iter = 0; + us = msecs_to_usecs(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + do_idle(); + + xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt); + __GUEST_ASSERT(xcnt_diff_us >= us, + "xcnt_diff_us = 0x%lx, us = 0x%lx.\n", + xcnt_diff_us, us); + } + local_irq_enable(); +} + +static void guest_time_count_test(uint32_t cpu) +{ + uint32_t config_iter; + unsigned long start, end, prev, us; + + /* Assuming that test case starts to run in 1 second */ + start = timer_get_cycles(); + us = msec_to_cycles(1000); + __GUEST_ASSERT(start <= us, + "start = 0x%lx, us = 0x%lx.\n", + start, us); + + us = msec_to_cycles(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + start = timer_get_cycles(); + end = start + us; + /* test time count growing up always */ + while (start < end) { + prev = start; + start = timer_get_cycles(); + __GUEST_ASSERT(prev <= start, + "prev = 0x%lx, start = 0x%lx.\n", + prev, start); + } + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + + /* must run at first */ + guest_time_count_test(cpu); + + timer_irq_enable(); + local_irq_enable(); + guest_test_period_timer(cpu); + guest_test_oneshot_timer(cpu); + guest_test_emulate_timer(cpu); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 05fcf902e067..3cdfa3b19b85 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -22,6 +22,7 @@ #include "processor.h" #include "test_util.h" #include "guest_modes.h" +#include "ucall_common.h" #define DUMMY_MEMSLOT_INDEX 7 @@ -53,12 +54,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { @@ -85,6 +80,7 @@ struct test_params { useconds_t delay; uint64_t nr_iterations; bool partition_vcpu_memory_access; + bool disable_slot_zap_quirk; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -95,6 +91,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); +#ifdef __x86_64__ + if (p->disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + + pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ? + "disabled" : "enabled"); +#endif pr_info("Finished creating vCPUs\n"); @@ -113,11 +116,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n" " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); guest_modes_help(); printf(" -d: add a delay between each iteration of adding and\n" " deleting a memslot in usec.\n"); + printf(" -q: Disable memslot zap quirk.\n"); printf(" -b: specify the size of the memory region which should be\n" " accessed by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -143,7 +147,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -166,6 +170,14 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; +#ifdef __x86_64__ + case 'q': + p.disable_slot_zap_quirk = true; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 'h': default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 579a64f97333..5087d082c4b0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -25,6 +25,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <ucall_common.h> #define MEM_EXTRA_SIZE SZ_64K @@ -113,6 +114,9 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +#ifdef __x86_64__ +static bool disable_slot_zap_quirk; +#endif static bool verbose; #define pr_info_v(...) \ @@ -414,7 +418,7 @@ static bool _guest_should_exit(void) */ static noinline void host_perform_sync(struct sync_area *sync) { - alarm(2); + alarm(10); atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) @@ -578,6 +582,11 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; +#ifdef __x86_64__ + if (disable_slot_zap_quirk) + vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); +#endif + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { @@ -896,6 +905,7 @@ static void help(char *name, struct test_args *targs) pr_info(" -h: print this help screen.\n"); pr_info(" -v: enable verbose mode (not for benchmarking).\n"); pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", targs->nslots); pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", @@ -954,7 +964,7 @@ static bool parse_args(int argc, char *argv[], uint32_t max_mem_slots; int opt; - while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case 'h': default: @@ -966,6 +976,13 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; +#ifdef __x86_64__ + case 'q': + disable_slot_zap_quirk = true; + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 0b9678858b6d..51c070556f3e 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -15,16 +15,63 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; + int i; - for (;;) { + for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; - GUEST_SYNC(0); + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(i); } + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + *((volatile uint64_t *)gpa); + GUEST_SYNC(2); + + /* + * Write to the region while mprotect(PROT_READ) is underway. Keep + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. + * + * For architectures that support skipping the faulting instruction, + * generate the store via inline assembly to ensure the exact length + * of the instruction is known and stable (vcpu_arch_put_guest() on + * fixed-length architectures should work, but the cost of paranoia + * is low in this case). For x86, hand-code the exact opcode so that + * there is no room for variability in the generated instruction. + */ + do { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) +#ifdef __x86_64__ + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ +#elif defined(__aarch64__) + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); +#else + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); +#endif + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); + + /* + * Only architectures that write the entire range can explicitly sync, + * as other architectures will be stuck on the write fault. + */ +#if defined(__x86_64__) || defined(__aarch64__) + GUEST_SYNC(3); +#endif + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(4); + + GUEST_ASSERT(0); } struct vcpu_info { @@ -35,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -51,34 +99,104 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vcpu *vcpu) +static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], stage); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { vcpu_run(vcpu); - TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + assert_sync_stage(vcpu, stage); } static void *vcpu_worker(void *data) { + struct kvm_sregs __maybe_unused sregs; struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; + int r; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 0, write all of guest memory. */ + run_vcpu(vcpu, 0); rendezvous_with_boss(); - vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ + vcpu_sregs_get(vcpu, &sregs); /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; -#endif vcpu_sregs_set(vcpu, &sregs); +#endif rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 1, re-write all of guest memory. */ + run_vcpu(vcpu, 1); + rendezvous_with_boss(); + + /* Stage 2, read all of guest memory, which is now read-only. */ + run_vcpu(vcpu, 2); + + /* + * Stage 3, write guest memory and verify KVM returns -EFAULT for once + * the mprotect(PROT_READ) lands. Only architectures that support + * validating *all* of guest memory sync for this stage, as vCPUs will + * be stuck on the faulting instruction for other architectures. Go to + * stage 3 without a rendezvous + */ + r = _vcpu_run(vcpu); + TEST_ASSERT(r == -1 && errno == EFAULT, + "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + +#if defined(__x86_64__) || defined(__aarch64__) + /* + * Verify *all* writes from the guest hit EFAULT due to the VMA now + * being read-only. x86 and arm64 only at this time as skipping the + * instruction that hits the EFAULT requires advancing the program + * counter, which is arch specific and relies on inline assembly. + */ +#ifdef __x86_64__ + vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS; +#endif + for (;;) { + r = _vcpu_run(vcpu); + if (!r) + break; + TEST_ASSERT_EQ(errno, EFAULT); +#if defined(__x86_64__) + WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); + vcpu->run->s.regs.regs.rip += 3; +#elif defined(__aarch64__) + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); +#endif + + } + assert_sync_stage(vcpu, 3); +#endif /* __x86_64__ || __aarch64__ */ + rendezvous_with_boss(); + + /* + * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to + * make the memory writable again. + */ + do { + r = _vcpu_run(vcpu); + } while (r && errno == EFAULT); + TEST_ASSERT_EQ(r, 0); + assert_sync_stage(vcpu, 4); rendezvous_with_boss(); return NULL; @@ -145,8 +263,10 @@ static void calc_default_nr_vcpus(void) TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); - nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + nr_vcpus = CPU_COUNT(&possible_mask); TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); + if (nr_vcpus >= 2) + nr_vcpus = nr_vcpus * 3/4; } int main(int argc, char *argv[]) @@ -161,7 +281,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -209,14 +329,19 @@ int main(int argc, char *argv[]) vcpus = malloc(nr_vcpus * sizeof(*vcpus)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, +#ifdef __x86_64__ + max_mem / SZ_1G, +#else + max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size, +#endif + guest_code, vcpus); max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); @@ -237,11 +362,9 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); + virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); + virt_map(vm, gpa, gpa, slot_size >> vm->page_shift); #endif } @@ -259,14 +382,27 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_reset, "reset"); rendezvous_with_vcpus(&time_run2, "run 2"); + mprotect(mem, slot_size, PROT_READ); + mprotect_ro_done = true; + sync_global_to_guest(vm, mprotect_ro_done); + + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + mprotect(mem, slot_size, PROT_READ | PROT_WRITE); + rendezvous_with_vcpus(&time_rw, "mprotect RW"); + + time_rw = timespec_sub(time_rw, time_ro); + time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); + time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " + "ro = %ld.%.9lds, rw = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); + time_run2.tv_sec, time_run2.tv_nsec, + time_ro.tv_sec, time_ro.tv_nsec, + time_rw.tv_sec, time_rw.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of @@ -276,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c new file mode 100644 index 000000000000..93e603d91311 --- /dev/null +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Intel, Inc + * + * Author: + * Isaku Yamahata <isaku.yamahata at gmail.com> + */ +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <pthread.h> + +/* Arbitrarily chosen values */ +#define TEST_SIZE (SZ_2M + PAGE_SIZE) +#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) +#define TEST_SLOT 10 + +static void guest_code(uint64_t base_gva) +{ + volatile uint64_t val __used; + int i; + + for (i = 0; i < TEST_NPAGES; i++) { + uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE); + + val = *src; + } + + GUEST_DONE(); +} + +struct slot_worker_data { + struct kvm_vm *vm; + u64 gpa; + uint32_t flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) +{ + struct kvm_pre_fault_memory range = { + .gpa = base_gpa + offset, + .size = size, + .flags = 0, + }; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; + int ret, save_errno; + u64 prev; + + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); + + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); + + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { + prev = range.size; + ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); + save_errno = errno; + TEST_ASSERT((range.size < prev) ^ (ret < 0), + "%sexpecting range.size to change on %s", + ret < 0 ? "not " : "", + ret < 0 ? "failure" : "success"); + + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; + + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } + + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); + else + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); +} + +static void __test_pre_fault_memory(unsigned long vm_type, bool private) +{ + uint64_t gpa, gva, alignment, guest_page_size; + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = vm_type, + }; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); + + alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size; +#ifdef __s390x__ + alignment = max(0x100000UL, guest_page_size); +#else + alignment = SZ_2M; +#endif + gpa = align_down(gpa, alignment); + gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT, + TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, gva, gpa, TEST_NPAGES); + + if (private) + vm_mem_set_private(vm, gpa, TEST_SIZE); + + pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); + + vcpu_args_set(vcpu, 1, gva); + vcpu_run(vcpu); + + run = vcpu->run; + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + break; + } + + kvm_vm_free(vm); +} + +static void test_pre_fault_memory(unsigned long vm_type, bool private) +{ + if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { + pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); + return; + } + + __test_pre_fault_memory(vm_type, private); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); + + test_pre_fault_memory(0, false); +#ifdef __x86_64__ + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); +#endif + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 2c792228ac0b..f962fefc48fa 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -15,7 +15,7 @@ static int timer_irq = IRQ_S_TIMER; -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { uint64_t xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; @@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_vector_tables(vcpus[i]); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency)); sync_global_to_guest(vm, timer_freq); pr_debug("timer_freq: %lu\n", timer_freq); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 0e0712854953..739d17befb5a 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -27,7 +27,7 @@ static void guest_code(void) GUEST_DONE(); } -static void guest_breakpoint_handler(struct ex_regs *regs) +static void guest_breakpoint_handler(struct pt_regs *regs) { WRITE_ONCE(sw_bp_addr, regs->epc); regs->epc += 4; @@ -60,7 +60,7 @@ int main(void) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); /* skip sw_bp_1 */ diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 222198dd6d04..cb54a56990a0 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -17,6 +17,15 @@ enum { VCPU_FEATURE_SBI_EXT, }; +enum { + KVM_RISC_V_REG_OFFSET_VSTART = 0, + KVM_RISC_V_REG_OFFSET_VL, + KVM_RISC_V_REG_OFFSET_VTYPE, + KVM_RISC_V_REG_OFFSET_VCSR, + KVM_RISC_V_REG_OFFSET_VLENB, + KVM_RISC_V_REG_OFFSET_MAX, +}; + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; bool filter_reg(__u64 reg) @@ -41,14 +50,23 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: @@ -56,11 +74,19 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: @@ -68,6 +94,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: @@ -78,6 +105,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: @@ -101,7 +130,10 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: return true; @@ -126,6 +158,38 @@ bool check_reject_set(int err) return err == EINVAL; } +static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, + uint64_t feature) +{ + unsigned long vlenb_reg = 0; + int rc; + u64 reg, size; + + /* Enable V extension so that we can get the vlenb register */ + rc = __vcpu_set_reg(vcpu, feature, 1); + if (rc) + return rc; + + vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]); + if (!vlenb_reg) { + TEST_FAIL("Can't compute vector register size from zero vlenb\n"); + return -EPERM; + } + + size = __builtin_ctzl(vlenb_reg); + size <<= KVM_REG_SIZE_SHIFT; + + for (int i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i); + s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg; + } + + /* We should assert if disabling failed here while enabling succeeded before */ + vcpu_set_reg(vcpu, feature, 0); + + return 0; +} + void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; @@ -155,6 +219,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) if (!s->feature) continue; + if (s->feature == KVM_RISCV_ISA_EXT_V) { + feature = RISCV_ISA_EXT_REG(s->feature); + rc = override_vector_reg_size(vcpu, s, feature); + if (rc) + goto skip; + } + switch (s->feature_type) { case VCPU_FEATURE_ISA_EXT: feature = RISCV_ISA_EXT_REG(s->feature); @@ -169,6 +240,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Try to enable the desired extension */ __vcpu_set_reg(vcpu, feature, 1); +skip: /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature), "%s not available, skipping tests", s->name); @@ -189,6 +261,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)"; case KVM_REG_RISCV_CONFIG_REG(mvendorid): return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; case KVM_REG_RISCV_CONFIG_REG(marchid): @@ -393,6 +467,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) return strdup_printf("%lld /* UNKNOWN */", reg_off); } +static const char *vector_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_v_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR); + int reg_index = 0; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR); + + if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0)) + reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0); + switch (reg_off) { + case KVM_REG_RISCV_VECTOR_REG(0) ... + KVM_REG_RISCV_VECTOR_REG(31): + return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index); + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + #define KVM_ISA_EXT_ARR(ext) \ [KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext @@ -407,14 +510,23 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(I), KVM_ISA_EXT_ARR(M), KVM_ISA_EXT_ARR(V), + KVM_ISA_EXT_ARR(SMNPM), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), + KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), @@ -422,11 +534,19 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -434,6 +554,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), KVM_ISA_EXT_ARR(ZKNH), @@ -444,6 +565,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -513,10 +636,13 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), - KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), }; if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) @@ -571,6 +697,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off) return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); } +static const char *sbi_fwft_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)"; + case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)"; + case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)"; + case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)"; + case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)"; + case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off); +} + static const char *sbi_id_to_str(const char *prefix, __u64 id) { __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); @@ -583,6 +722,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id) switch (reg_subtype) { case KVM_REG_RISCV_SBI_STA: return sbi_sta_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_FWFT: + return sbi_fwft_id_to_str(reg_off); } return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); @@ -605,6 +746,9 @@ void print_reg(const char *prefix, __u64 id) case KVM_REG_SIZE_U128: reg_size = "KVM_REG_SIZE_U128"; break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; default: printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); @@ -636,6 +780,10 @@ void print_reg(const char *prefix, __u64 id) printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", reg_size, fp_d_id_to_str(prefix, id)); break; + case KVM_REG_RISCV_VECTOR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n", + reg_size, vector_id_to_str(prefix, id)); + break; case KVM_REG_RISCV_ISA_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", reg_size, isa_ext_id_to_str(prefix, id)); @@ -661,10 +809,13 @@ void print_reg(const char *prefix, __u64 id) */ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), @@ -740,11 +891,26 @@ static __u64 sbi_sta_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), }; +static __u64 sbi_fwft_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value), +}; + static __u64 zicbom_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, }; +static __u64 zicbop_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP, +}; + static __u64 zicboz_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, @@ -840,6 +1006,48 @@ static __u64 fp_d_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, }; +/* Define a default vector registers with length. This will be overwritten at runtime */ +static __u64 vector_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V, +}; + #define SUBLIST_BASE \ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} @@ -849,8 +1057,13 @@ static __u64 fp_d_regs[] = { #define SUBLIST_SBI_STA \ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_SBI_FWFT \ + {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \ + .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),} #define SUBLIST_ZICBOM \ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define SUBLIST_ZICBOP \ + {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),} #define SUBLIST_ZICBOZ \ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} #define SUBLIST_AIA \ @@ -864,6 +1077,9 @@ static __u64 fp_d_regs[] = { {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ .regs_n = ARRAY_SIZE(fp_d_regs),} +#define SUBLIST_V \ + {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),} + #define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ static __u64 regs_##ext[] = { \ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ @@ -927,18 +1143,31 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); +KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); +KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY); +KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SUBLIST_CONFIG(v, V); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); +KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); +KVM_ISA_EXT_SIMPLE_CONFIG(ssnpm, SSNPM); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE); +KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); +KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); +KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); @@ -946,11 +1175,19 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); +KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); @@ -958,6 +1195,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); @@ -968,6 +1206,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO); KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA); KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); @@ -984,17 +1224,30 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_sta, &config_sbi_pmu, &config_sbi_dbcn, + &config_sbi_susp, + &config_sbi_mpxy, + &config_sbi_fwft, &config_aia, &config_fp_f, &config_fp_d, &config_h, + &config_v, + &config_smnpm, &config_smstateen, &config_sscofpmf, + &config_ssnpm, &config_sstc, + &config_svade, + &config_svadu, &config_svinval, &config_svnapot, &config_svpbmt, + &config_svvptc, + &config_zaamo, + &config_zabha, &config_zacas, + &config_zalrsc, + &config_zawrs, &config_zba, &config_zbb, &config_zbc, @@ -1002,11 +1255,19 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zbkc, &config_zbkx, &config_zbs, + &config_zca, + &config_zcb, + &config_zcd, + &config_zcf, + &config_zcmop, &config_zfa, + &config_zfbfmin, &config_zfh, &config_zfhmin, &config_zicbom, + &config_zicbop, &config_zicboz, + &config_ziccrse, &config_zicntr, &config_zicond, &config_zicsr, @@ -1014,6 +1275,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zihintntl, &config_zihintpause, &config_zihpm, + &config_zimop, &config_zknd, &config_zkne, &config_zknh, @@ -1024,6 +1286,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_ztso, &config_zvbb, &config_zvbc, + &config_zvfbfmin, + &config_zvfbfwma, &config_zvfh, &config_zvfhmin, &config_zvkb, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f299cbfd23ca..924a335d2262 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -39,7 +39,13 @@ static bool illegal_handler_invoked; #define SBI_PMU_TEST_SNAPSHOT BIT(2) #define SBI_PMU_TEST_OVERFLOW BIT(3) -static int disabled_tests; +#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5 +struct test_args { + int disabled_tests; + int overflow_irqnum; +}; + +static struct test_args targs; unsigned long pmu_csr_read_num(int csr_num) { @@ -67,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num) switch (csr_num) { switchcase_csr_read_32(CSR_CYCLE, ret) - switchcase_csr_read_32(CSR_CYCLEH, ret) default : break; } @@ -118,26 +123,44 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, 0, 0, 0); - __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n", - counter, ret.error); + __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld error %ld\n", counter, ret.error); } -static void guest_illegal_exception_handler(struct ex_regs *regs) +static void guest_illegal_exception_handler(struct pt_regs *regs) { + unsigned long insn; + int opcode, csr_num, funct3; + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, "Unexpected exception handler %lx\n", regs->cause); + insn = regs->badaddr; + opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT; + __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM, + "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn); + + csr_num = GET_CSR_NUM(insn); + funct3 = GET_RM(insn); + /* Validate if it is a CSR read/write operation */ + __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4), + "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n", + funct3, csr_num); + + /* Validate if it is a HPMCOUNTER CSR operation */ + __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31), + "Unexpected csr_num 0x%x\n", csr_num); + illegal_handler_invoked = true; /* skip the trapping instruction */ regs->epc += 4; } -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; unsigned long overflown_mask; - unsigned long counter_val = 0; /* Validate that we are in the correct irq handler */ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); @@ -151,10 +174,6 @@ static void guest_irq_handler(struct ex_regs *regs) GUEST_ASSERT(overflown_mask & 0x01); WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); - - counter_val = READ_ONCE(snapshot_data->ctr_values[0]); - /* Now start the counter to mimick the real driver behavior */ - start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val); } static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, @@ -479,7 +498,7 @@ static void test_pmu_events_snaphost(void) static void test_pmu_events_overflow(void) { - int num_counters = 0; + int num_counters = 0, i = 0; /* Verify presence of SBI PMU and minimum requrired SBI version */ verify_sbi_requirement_assert(); @@ -496,11 +515,15 @@ static void test_pmu_events_overflow(void) * Qemu supports overflow for cycle/instruction. * This test may fail on any platform that do not support overflow for these two events. */ - test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1); + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); - test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2); + vcpu_shared_irq_count = 0; + + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); GUEST_DONE(); } @@ -608,8 +631,12 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ - vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); + + /* Export the shared variables to the guest */ sync_global_to_guest(vm, timer_freq); + sync_global_to_guest(vm, vcpu_shared_irq_count); + sync_global_to_guest(vm, targs); run_vcpu(vcpu); @@ -618,28 +645,38 @@ static void test_vm_events_overflow(void *guest_code) static void test_print_help(char *name) { - pr_info("Usage: %s [-h] [-d <test name>]\n", name); - pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n", + name); + pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n", + SBI_PMU_OVERFLOW_IRQNUM_DEFAULT); pr_info("\t-h: print this help screen\n"); } static bool parse_args(int argc, char *argv[]) { int opt; + int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT | + SBI_PMU_TEST_OVERFLOW; + int overflow_interrupts = 0; - while ((opt = getopt(argc, argv, "hd:")) != -1) { + while ((opt = getopt(argc, argv, "ht:n:")) != -1) { switch (opt) { - case 'd': + case 't': if (!strncmp("basic", optarg, 5)) - disabled_tests |= SBI_PMU_TEST_BASIC; + temp_disabled_tests &= ~SBI_PMU_TEST_BASIC; else if (!strncmp("events", optarg, 6)) - disabled_tests |= SBI_PMU_TEST_EVENTS; + temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS; else if (!strncmp("snapshot", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_SNAPSHOT; + temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT; else if (!strncmp("overflow", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_OVERFLOW; + temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW; else goto done; + targs.disabled_tests = temp_disabled_tests; + break; + case 'n': + overflow_interrupts = atoi_positive("Number of LCOFI", optarg); break; case 'h': default: @@ -647,6 +684,15 @@ static bool parse_args(int argc, char *argv[]) } } + if (overflow_interrupts > 0) { + if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) { + pr_info("-n option is only available for overflow test\n"); + goto done; + } else { + targs.overflow_irqnum = overflow_interrupts; + } + } + return true; done: test_print_help(argv[0]); @@ -655,25 +701,28 @@ done: int main(int argc, char *argv[]) { + targs.disabled_tests = 0; + targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT; + if (!parse_args(argc, argv)) exit(KSFT_SKIP); - if (!(disabled_tests & SBI_PMU_TEST_BASIC)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) { test_vm_basic_test(test_pmu_basic_sanity); pr_info("SBI PMU basic test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) { test_vm_events_test(test_pmu_events); pr_info("SBI PMU event verification test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { test_vm_events_snapshot_test(test_pmu_events_snaphost); pr_info("SBI PMU event verification with snapshot test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) { test_vm_events_overflow(test_pmu_events_overflow); pr_info("SBI PMU event verification with overflow test : PASS\n"); } diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab..1375fca80bcd 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index b39033844756..e39a724fe860 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -17,16 +17,17 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" #define MAIN_PAGE_COUNT 512 #define TEST_DATA_PAGE_COUNT 512 #define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN 4096 +#define TEST_DATA_START_GFN PAGE_SIZE #define TEST_DATA_TWO_PAGE_COUNT 256 #define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN 8192 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; @@ -66,7 +67,7 @@ static void guest_dirty_test_data(void) " lghi 5,%[page_count]\n" /* r5 += r1 */ "2: agfr 5,1\n" - /* r2 = r1 << 12 */ + /* r2 = r1 << PAGE_SHIFT */ "1: sllg 2,1,12(0)\n" /* essa(r4, r2, SET_STABLE) */ " .insn rrf,0xb9ab0000,4,2,1,0\n" @@ -144,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm) slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, 0); } static struct kvm_vm *create_vm_two_memslots(void) @@ -443,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) ); } -static void test_get_inital_dirty(void) +static void test_get_initial_dirty(void) { struct kvm_vm *vm = create_vm_two_memslots(); struct kvm_vcpu *vcpu; @@ -650,7 +651,7 @@ struct testdef { } testlist[] = { { "migration mode and dirty tracking", test_migration_mode }, { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty }, { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, }; diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..aded795d42be --- /dev/null +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not available\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c index 84313fb27529..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390/debug_test.c @@ -2,12 +2,12 @@ /* Test KVM debugging features. */ #include "kvm_util.h" #include "test_util.h" +#include "sie.h" #include <linux/kvm.h> #define __LC_SVC_NEW_PSW 0x1c0 #define __LC_PGM_NEW_PSW 0x1d0 -#define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 #define PGM_SPECIFICATION 0x06 @@ -85,7 +85,7 @@ static void test_step_pgm_diag(void) vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, __LC_PGM_NEW_PSW, new_psw); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c index f2df7416be84..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -16,6 +16,7 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" enum mop_target { LOGICAL, @@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c index 357943f2bea8..b58f75b381e5 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { uint64_t eval_reg; - vcpu_get_reg(vcpu, id, &eval_reg); + eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index bba0d9a6dcc8..bba0d9a6dcc8 100644 --- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c index 53def355ccba..53def355ccba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 7a742a673b7c..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -9,9 +9,8 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -151,7 +150,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) * instead. * In order to skip these tests we detect this inside the guest */ - skip = tests[*i].addr < (void *)4096 && + skip = tests[*i].addr < (void *)PAGE_SIZE && tests[*i].expected != TRANSL_UNAVAIL && !mapped_0; if (!skip) { diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c new file mode 100644 index 000000000000..50bc1c38225a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -0,0 +1,798 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ +#include "debug_print.h" +#include "kselftest_harness.h" +#include "kvm_util.h" +#include "processor.h" +#include "sie.h" + +#include <linux/capability.h> +#include <linux/sizes.h> + +#define PGM_SEGMENT_TRANSLATION 0x10 + +#define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + vm_paddr_t pgd; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (u64)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION", + strerror(errno), errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2", + strerror(errno), errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb22b); + + /* SSKE + ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + +static char uc_flic_b[PAGE_SIZE]; +static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 }; +static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 }; +static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 }; +static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 }; +static struct uc_flic_attr_test { + char *name; + struct kvm_device_attr a; + int hasrc; + int geterrno; + int seterrno; +} uc_flic_attr_tests[] = { + { + .name = "KVM_DEV_FLIC_GET_ALL_IRQS", + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_GET_ALL_IRQS, + .addr = (u64)&uc_flic_b, + .attr = PAGE_SIZE, + }, + }, + { + .name = "KVM_DEV_FLIC_ENQUEUE", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_ENQUEUE, }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IRQS", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_REGISTER", + .geterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_REGISTER, + .addr = (u64)&uc_flic_ioa, + }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_MODIFY", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_MODIFY, + .addr = (u64)&uc_flic_ioam, + .attr = sizeof(uc_flic_ioam), + }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_CLEAR_IO_IRQ, + .attr = 32, + }, + }, + { + .name = "KVM_DEV_FLIC_AISM", + .geterrno = EINVAL, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM, + .addr = (u64)&uc_flic_asim, + }, + }, + { + .name = "KVM_DEV_FLIC_AIRQ_INJECT", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, }, + }, + { + .name = "KVM_DEV_FLIC_AISM_ALL", + .geterrno = ENOTSUP, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM_ALL, + .addr = (u64)&uc_flic_asima, + .attr = sizeof(uc_flic_asima), + }, + }, + { + .name = "KVM_DEV_FLIC_APF_ENABLE", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_ENABLE, }, + }, + { + .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, }, + }, +}; + +TEST_F(uc_kvm, uc_flic_attrs) +{ + struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC }; + struct kvm_device_attr attr; + u64 value; + int rc, i; + + rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd); + ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) { + TH_LOG("test %s", uc_flic_attr_tests[i].name); + attr = (struct kvm_device_attr) { + .group = uc_flic_attr_tests[i].a.group, + .attr = uc_flic_attr_tests[i].a.attr, + .addr = uc_flic_attr_tests[i].a.addr, + }; + if (attr.addr == 0) + attr.addr = (u64)&value; + + rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc) + TH_LOG("expected dev attr missing %s", + uc_flic_attr_tests[i].name); + + rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc) + TH_LOG("get dev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].geterrno) + EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno) + TH_LOG("get dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + + rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc) + TH_LOG("set sev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].seterrno) + EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno) + TH_LOG("set dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + } + + close(cd.fd); +} + +TEST_F(uc_kvm, uc_set_gsi_routing) +{ + struct kvm_irq_routing *routing = kvm_gsi_routing_create(); + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + .u.adapter = (struct kvm_irq_routing_s390_adapter) { + .ind_addr = 0, + }, + }; + int rc; + + routing->entries[0] = ue; + routing->nr = 1; + rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing); + ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno); + ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c new file mode 100644 index 000000000000..714906c1d12a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/user_operexec.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test operation exception forwarding. + * + * Copyright IBM Corp. 2025 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include "kselftest.h" +#include "kvm_util.h" +#include "test_util.h" +#include "sie.h" + +#include <linux/kvm.h> + +static void guest_code_instr0(void) +{ + asm(".word 0x0000"); +} + +static void test_user_instr0(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +static void guest_code_user_operexec(void) +{ + asm(".word 0x0807"); +} + +static void test_user_operexec(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* + * Since user_operexec is the superset it can be used for the + * 0 instruction. + */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +/* combine user_instr0 and user_operexec */ +static void test_user_operexec_combined(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* Reverse enablement order */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); +} + +/* + * Run all tests above. + * + * Enablement after VCPU has been added is automatically tested since + * we enable the capability after VCPU creation. + */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "instr0", test_user_instr0 }, + { "operexec", test_user_operexec }, + { "operexec_combined", test_user_operexec_combined}, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0)); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bb8002084f52..7fe427ff9b38 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -17,9 +17,9 @@ #include <processor.h> /* - * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a - * 2MB sized and aligned region so that the initial region corresponds to - * exactly one large page. + * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB + * sized and aligned region so that the initial region corresponds to exactly + * one large page. */ #define MEM_REGION_SIZE 0x200000 @@ -175,7 +175,7 @@ static void guest_code_move_memory_region(void) GUEST_DONE(); } -static void test_move_memory_region(void) +static void test_move_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -184,6 +184,9 @@ static void test_move_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); /* @@ -232,7 +235,7 @@ static void guest_code_delete_memory_region(void) * in the guest will never succeed, and so isn't an option. */ memset(&idt, 0, sizeof(idt)); - __asm__ __volatile__("lidt %0" :: "m"(idt)); + set_idt(&idt); GUEST_SYNC(0); @@ -266,7 +269,7 @@ static void guest_code_delete_memory_region(void) GUEST_ASSERT(0); } -static void test_delete_memory_region(void) +static void test_delete_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -276,6 +279,9 @@ static void test_delete_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); wait_for_vcpu(); @@ -344,7 +350,7 @@ static void test_invalid_memory_region_flags(void) struct kvm_vm *vm; int r, i; -#if defined __aarch64__ || defined __riscv || defined __x86_64__ +#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__ supported_flags |= KVM_MEM_READONLY; #endif @@ -427,10 +433,10 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) @@ -440,9 +446,8 @@ static void test_add_max_memory_regions(void) mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, (uint64_t)max_mem_slots * MEM_REGION_SIZE, @@ -450,8 +455,8 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } @@ -547,18 +552,72 @@ static void test_add_overlapping_private_memory_regions(void) close(memfd); kvm_vm_free(vm); } + +static void guest_code_mmio_during_vectoring(void) +{ + const struct desc_ptr idt_desc = { + .address = MEM_REGION_GPA, + .size = 0xFFF, + }; + + set_idt(&idt_desc); + + /* Generate a #GP by dereferencing a non-canonical address */ + *((uint8_t *)NONCANONICAL) = 0x1; + + GUEST_ASSERT(0); +} + +/* + * This test points the IDT descriptor base to an MMIO address. It should cause + * a KVM internal error when an event occurs in the guest. + */ +static void test_mmio_during_vectoring(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + u64 expected_gpa; + + pr_info("Testing MMIO during vectoring error handling\n"); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1); + + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV, + "Unexpected suberror = %d", vcpu->run->internal.suberror); + TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d", + run->internal.ndata); + + /* The reported GPA should be IDT base + offset of the GP vector */ + expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry); + + TEST_ASSERT(run->internal.data[3] == expected_gpa, + "Unexpected GPA = %llx (expected %lx)", + vcpu->run->internal.data[3], expected_gpa); + + kvm_vm_free(vm); +} #endif int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; + int j, disable_slot_zap_quirk = 0; + if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL) + disable_slot_zap_quirk = 1; /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. */ test_zero_memory_regions(); + test_mmio_during_vectoring(); #endif test_invalid_memory_region_flags(); @@ -579,13 +638,17 @@ int main(int argc, char *argv[]) else loops = 10; - pr_info("Testing MOVE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_move_memory_region(); + for (j = 0; j <= disable_slot_zap_quirk; j++) { + pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_move_memory_region(!!j); - pr_info("Testing DELETE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_delete_memory_region(); + pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_delete_memory_region(!!j); + } #endif return 0; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index a8d3afa0b86b..8edc1fca345b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -118,7 +118,7 @@ static int64_t smccc(uint32_t func, uint64_t arg) { struct arm_smccc_res res; - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -269,9 +269,8 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); - unsigned long enabled; + unsigned long enabled = vcpu_get_reg(vcpu, id); - vcpu_get_reg(vcpu, id, &enabled); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); return enabled; diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 903940c54d2d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -86,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -122,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..8b15a13df939 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs. + */ + +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <asm/msr-index.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +{ + uint64_t data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + uint64_t msr_val; + uint8_t vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + uint64_t host_aperf_before, host_mperf_before; + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + uint64_t host_aperf_after, host_mperf_after; + uint64_t guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c new file mode 100644 index 000000000000..f8916bb34405 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Verify KVM correctly emulates the APIC bus frequency when the VMM configures + * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by + * programming TMICT (timer initial count) to the largest value possible (so + * that the timer will not expire during the test). Then, after an arbitrary + * amount of time has elapsed, verify TMCCT (timer current count) is within 1% + * of the expected value based on the time elapsed, the APIC bus frequency, and + * the programmed TDCR (timer divide configuration register). + */ + +#include "apic.h" +#include "test_util.h" + +/* + * Possible TDCR values with matching divide count. Used to modify APIC + * timer frequency. + */ +static const struct { + const uint32_t tdcr; + const uint32_t divide_count; +} tdcrs[] = { + {0x0, 2}, + {0x1, 4}, + {0x2, 8}, + {0x3, 16}, + {0x8, 32}, + {0x9, 64}, + {0xa, 128}, + {0xb, 1}, +}; + +static bool is_x2apic; + +static void apic_enable(void) +{ + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); +} + +static uint32_t apic_read_reg(unsigned int reg) +{ + return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); +} + +static void apic_write_reg(unsigned int reg, uint32_t val) +{ + if (is_x2apic) + x2apic_write_reg(reg, val); + else + xapic_write_reg(reg, val); +} + +static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + uint64_t tsc_hz = guest_tsc_khz * 1000; + const uint32_t tmict = ~0u; + uint64_t tsc0, tsc1, freq; + uint32_t tmcct; + int i; + + apic_enable(); + + /* + * Setup one-shot timer. The vector does not matter because the + * interrupt should not fire. + */ + apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); + + for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { + apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); + apic_write_reg(APIC_TMICT, tmict); + + tsc0 = rdtsc(); + udelay(delay_ms * 1000); + tmcct = apic_read_reg(APIC_TMCCT); + tsc1 = rdtsc(); + + /* + * Stop the timer _after_ reading the current, final count, as + * writing the initial counter also modifies the current count. + */ + apic_write_reg(APIC_TMICT, 0); + + freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); + /* Check if measured frequency is within 5% of configured frequency. */ + __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, + "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", + freq, apic_hz * 95 / 100, apic_hz * 105 / 100, + apic_hz, tdcrs[i].divide_count, tsc_hz); + } + + GUEST_DONE(); +} + +static void test_apic_bus_clock(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + } +} + +static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, + bool x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int ret; + + is_x2apic = x2apic; + + vm = vm_create(1); + + sync_global_to_guest(vm, is_x2apic); + + vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); + vcpu_args_set(vcpu, 2, apic_hz, delay_ms); + + ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + TEST_ASSERT(ret < 0 && errno == EINVAL, + "Setting of APIC bus frequency after vCPU is created should fail."); + + if (!is_x2apic) + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_apic_bus_clock(vcpu); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); + puts(""); + printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); + printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + /* + * Arbitrarilty default to 25MHz for the APIC bus frequency, which is + * different enough from the default 1GHz to be interesting. + */ + uint64_t apic_hz = 25 * 1000 * 1000; + uint64_t delay_ms = 100; + int opt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); + + while ((opt = getopt(argc, argv, "d:f:h")) != -1) { + switch (opt) { + case 'f': + apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; + break; + case 'd': + delay_ms = atoi_positive("Delay in milliseconds", optarg); + break; + case 'h': + default: + help(argv[0]); + exit(KSFT_SKIP); + } + } + + run_apic_bus_clock_test(apic_hz, delay_ms, false); + run_apic_bus_clock_test(apic_hz, delay_ms, true); +} diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 8c579ce714e9..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -12,17 +12,16 @@ #include "kvm_util.h" #include "processor.h" -/* CPUIDs known to differ */ -struct { - u32 function; - u32 index; -} mangled_cpuids[] = { - /* - * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, - * which are not controlled for by this test. - */ - {.function = 0xd, .index = 0}, - {.function = 0xd, .index = 1}, +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) @@ -56,17 +55,29 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_DONE(); } -static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) { - int i; - - for (i = 0; i < sizeof(mangled_cpuids); i++) { - if (mangled_cpuids[i].function == entrie->function && - mangled_cpuids[i].index == entrie->index) - return true; + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; } - - return false; + return mask; } static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, @@ -79,6 +90,8 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + e1 = &cpuid1->entries[i]; e2 = &cpuid2->entries[i]; @@ -88,15 +101,19 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); - if (is_cpuid_mangled(e1)) - continue; + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); - TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && - e1->ecx == e2->ecx && e1->edx == e2->edx, + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", e1->function, e1->index, - e1->eax, e1->ebx, e1->ecx, e1->edx, - e2->eax, e2->ebx, e2->ecx, e2->edx); + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); } } diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c new file mode 100644 index 000000000000..28cc66454601 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CR4 and CPUID sync test + * + * Copyright 2018, Red Hat, Inc. and/or its affiliates. + * + * Author: + * Wei Huang <wei@redhat.com> + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define MAGIC_HYPERCALL_PORT 0x80 + +static void guest_code(void) +{ + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; + + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); + + /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_sregs sregs; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (1) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + continue; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index f6b295e0b2d2..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -47,15 +47,18 @@ static void guest_code(void) /* * Single step test, covers 2 basic instructions and 2 emulated * - * Enable interrupts during the single stepping to see that - * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. */ asm volatile("ss_start: " "sti\n\t" "xor %%eax,%%eax\n\t" "cpuid\n\t" - "movl $0x1a0,%%ecx\n\t" - "rdmsr\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" "cli\n\t" : : : "eax", "ebx", "ecx", "edx"); @@ -163,7 +166,7 @@ int main(void) /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { target_rip += ss_size[i]; memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..b0d2b04a7ff2 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -41,9 +41,9 @@ struct kvm_page_stats { static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c index 81055476d394..81055476d394 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c new file mode 100644 index 000000000000..8926cfe0e209 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * Execute a fastop() instruction, with or without forced emulation. BT bit 0 + * to set RFLAGS.CF based on whether or not the input is even or odd, so that + * instructions like ADC and SBB are deterministic. + */ +#define fastop(__insn) \ + "bt $0, %[bt_val]\n\t" \ + __insn "\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" + +#define flags_constraint(flags_val) [flags]"=r"(flags_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val) + +#define guest_execute_fastop_1(FEP, insn, __val, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[val]") \ + : [val]"+r"(__val), flags_constraint(__flags) \ + : bt_constraint(__val) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_1(insn, type_t, __val) \ +({ \ + type_t val = __val, ex_val = __val, input = __val; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_1("", insn, ex_val, ex_flags); \ + guest_execute_fastop_1(KVM_FEP, insn, val, flags); \ + \ + __GUEST_ASSERT(val == ex_val, \ + "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \ + (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, flags); \ +}) + +#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : [input]"r"(__input), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_2(insn, type_t, __val1, __val2) \ +({ \ + type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \ + guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, (uint64_t)input, \ + (uint64_t)input2, (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \ +}) + +#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : "c"(__shift), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ +({ \ + type_t output = __val2, ex_output = __val2, input = __val2; \ + uint8_t shift = __val1; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ + guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, shift, (uint64_t)input, \ + (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + ex_flags, insn, shift, (uint64_t)input, flags); \ +}) + +#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ + : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ + KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : [denom]"rm"(__rm), bt_constraint(__rm) \ + : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define guest_test_fastop_div(insn, type_t, __val1, __val2) \ +({ \ + type_t _a = __val1, _d = __val1, rm = __val2; \ + type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ + uint64_t flags, ex_flags; \ + uint8_t v, ex_v; \ + \ + ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ + v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ + \ + GUEST_ASSERT_EQ(v, ex_v); \ + __GUEST_ASSERT(v == ex_v, \ + "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ + ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \ + __GUEST_ASSERT(a == ex_a && d == ex_d, \ + "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ + (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \ + (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \ + __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ + "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\ +}) + +static const uint64_t vals[] = { + 0, + 1, + 2, + 4, + 7, + 0x5555555555555555, + 0xaaaaaaaaaaaaaaaa, + 0xfefefefefefefefe, + 0xffffffffffffffff, +}; + +#define guest_test_fastops(type_t, suffix) \ +do { \ + int i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(vals); i++) { \ + guest_test_fastop_1("dec" suffix, type_t, vals[i]); \ + guest_test_fastop_1("inc" suffix, type_t, vals[i]); \ + guest_test_fastop_1("neg" suffix, type_t, vals[i]); \ + guest_test_fastop_1("not" suffix, type_t, vals[i]); \ + \ + for (j = 0; j < ARRAY_SIZE(vals); j++) { \ + guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ +if (sizeof(type_t) != 1) { \ + guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ +} \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \ + } \ + } \ +} while (0) + +static void guest_code(void) +{ + guest_test_fastops(uint8_t, "b"); + guest_test_fastops(uint16_t, "w"); + guest_test_fastops(uint32_t, "l"); + guest_test_fastops(uint64_t, "q"); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 762628f7d4ba..762628f7d4ba 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 37b1a9f52864..37b1a9f52864 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c index 10b1b0ba374e..10b1b0ba374e 100644 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index e058bc676cd6..e058bc676cd6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66..3c21af811d8f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -22,32 +22,19 @@ static void guest_code(void) { } -static bool smt_possible(void) -{ - char buf[16]; - FILE *f; - bool res = true; - - f = fopen("/sys/devices/system/cpu/smt/control", "r"); - if (f) { - if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { - if (!strncmp(buf, "forceoff", 8) || - !strncmp(buf, "notsupported", 12)) - res = false; - } - fclose(f); - } - - return res; -} - -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) +static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; + const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; u32 test_val; + if (vcpu) + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + else + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " (returned %d)", @@ -58,7 +45,7 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), - "function %x is our of supported range", + "function %x is out of supported range", entry->function); TEST_ASSERT(entry->index == 0, @@ -80,12 +67,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, entry->eax, evmcs_expected ); break; + case 0x40000003: + TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)), + "\"Direct\" Synthetic Timers should require in-kernel APIC"); + break; case 0x40000004: test_val = entry->eax & (1UL << 18); - TEST_ASSERT(!!test_val == !smt_possible(), + TEST_ASSERT(!!test_val == !is_smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); + + TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)), + "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC"); break; case 0x4000000A: TEST_ASSERT(entry->eax & (1UL << 19), @@ -109,9 +103,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } + + /* + * Note, the CPUID array returned by the system-scoped helper is a one- + * time allocation, i.e. must not be freed. + */ + if (vcpu) + free((void *)hv_cpuid_entries); } -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; @@ -129,19 +130,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + /* Test the vCPU ioctl without an in-kernel local APIC. */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_hv_cpuid(vcpu, false); + kvm_vm_free(vm); /* Test vCPU ioctl version */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, false); if (!kvm_cpu_has(X86_FEATURE_VMX) || !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { @@ -149,9 +151,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, true); do_sys: /* Test system ioctl version */ @@ -161,9 +161,7 @@ do_sys: } test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index e192720bfe14..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 949e08e98f31..949e08e98f31 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..130b9ce7e5dd 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected #GP on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); else __GUEST_ASSERT(!vector, - "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected success on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); if (vector || is_write_only_msr(msr->idx)) goto done; @@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { input = pgs_gpa; - output = pgs_gpa + 4096; + output = pgs_gpa + PAGE_SIZE; } else { input = output = 0; } @@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected #UD for control '%lu', got %s", + hcall->control, ex_str(vector)); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected no exception for control '%lu', got %s", + hcall->control, ex_str(vector)); GUEST_ASSERT_EQ(res, hcall->expect); } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..ca61836c4e32 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* Signal sender vCPU we're ready */ ipis_rcvd[vcpu_id] = (u64)-1; - for (;;) - asm volatile("sti; hlt; cli"); + for (;;) { + safe_halt(); + cli(); + } } static void guest_ipi_handler(struct ex_regs *regs) @@ -100,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ ipi->vector = IPI_VECTOR; ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -114,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 0; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -136,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -158,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -181,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index b987a3d79715..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -157,7 +157,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040..a3b7ce155981 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -621,7 +621,7 @@ int main(int argc, char *argv[]) for (i = 0; i < NTEST_PAGES; i++) { pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); } diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c new file mode 100644 index 000000000000..d88500c118eb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ +#include <linux/atomic.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" +#include "test_util.h" + +#define NR_BUS_LOCKS_PER_LEVEL 100 +#define CACHE_LINE_SIZE 64 + +/* + * To generate a bus lock, carve out a buffer that precisely occupies two cache + * lines and perform an atomic access that splits the two lines. + */ +static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE); +static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)]; + +static void guest_generate_buslocks(void) +{ + for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++) + atomic_inc(val); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_generate_buslocks(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *test_data) +{ + guest_generate_buslocks(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + int i, bus_locks = 0; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT)); + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + run = vcpu->run; + + for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) { + struct ucall uc; + + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto done; + case UCALL_SYNC: + continue; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK); + + /* + * Verify the counter is actually getting incremented, e.g. that + * KVM isn't skipping the instruction. On Intel, the exit is + * trap-like, i.e. the counter should already have been + * incremented. On AMD, it's fault-like, i.e. the counter will + * be incremented when the guest re-executes the instruction. + */ + sync_global_from_guest(vm, *val); + TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel); + + bus_locks++; + } + TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks); +done: + TEST_ASSERT_EQ(i, bus_locks); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5bc12222d87a..5bc12222d87a 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,10 +139,12 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); @@ -151,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c index 3cc4b86832fe..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c @@ -26,19 +26,37 @@ int main(int argc, char *argv[]) TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ + if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); + + /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); + } + /* Set KVM_CAP_MAX_VCPU_ID */ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); - /* Try to set KVM_CAP_MAX_VCPU_ID again */ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); - /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); + TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); + + /* Create vCPU with id within bounds */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); + TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); + + close(ret); kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..e45c028d2a7e 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -27,19 +30,27 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected #UD on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected success on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,65 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c new file mode 100644 index 000000000000..40d918aedce6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/msrs_test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <asm/msr-index.h> + +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" + +/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */ +#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR + +struct kvm_msr { + const struct kvm_x86_cpu_feature feature; + const struct kvm_x86_cpu_feature feature2; + const char *name; + const u64 reset_val; + const u64 write_val; + const u64 rsvd_val; + const u32 index; + const bool is_kvm_defined; +}; + +#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \ +{ \ + .index = msr, \ + .name = str, \ + .write_val = val, \ + .rsvd_val = rsvd, \ + .reset_val = reset, \ + .feature = X86_FEATURE_ ##feat, \ + .feature2 = X86_FEATURE_ ##f2, \ + .is_kvm_defined = is_kvm, \ +} + +#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \ + ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false) + +#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, reset, feat) + +#define MSR_TEST(msr, val, rsvd, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, 0, feat) + +#define MSR_TEST2(msr, val, rsvd, feat, f2) \ + ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false) + +/* + * Note, use a page aligned value for the canonical value so that the value + * is compatible with MSRs that use bits 11:0 for things other than addresses. + */ +static const u64 canonical_val = 0x123456789000ull; + +/* + * Arbitrary value with bits set in every byte, but not all bits set. This is + * also a non-canonical value, but that's coincidental (any 64-bit value with + * an alternating 0s/1s pattern will be non-canonical). + */ +static const u64 u64_val = 0xaaaa5555aaaa5555ull; + +#define MSR_TEST_CANONICAL(msr, feat) \ + __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat) + +#define MSR_TEST_KVM(msr, val, rsvd, feat) \ + ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true) + +/* + * The main struct must be scoped to a function due to the use of structures to + * define features. For the global structure, allocate enough space for the + * foreseeable future without getting too ridiculous, to minimize maintenance + * costs (bumping the array size every time an MSR is added is really annoying). + */ +static struct kvm_msr msrs[128]; +static int idx; + +static bool ignore_unsupported_msrs; + +static u64 fixup_rdmsr_val(u32 msr, u64 want) +{ + /* + * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM + * is supposed to emulate that behavior based on guest vendor model + * (which is the same as the host vendor model for this test). + */ + if (!host_cpu_is_amd) + return want; + + switch (msr) { + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + case MSR_TSC_AUX: + return want & GENMASK_ULL(31, 0); + default: + return want; + } +} + +static void __rdmsr(u32 msr, u64 want) +{ + u64 val; + u8 vec; + + vec = rdmsr_safe(msr, &val); + __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr); + + __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx", + want, msr, val); +} + +static void __wrmsr(u32 msr, u64 val) +{ + u8 vec; + + vec = wrmsr_safe(msr, val); + __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)", + ex_str(vec), msr, val); + __rdmsr(msr, fixup_rdmsr_val(msr, val)); +} + +static void guest_test_supported_msr(const struct kvm_msr *msr) +{ + __rdmsr(msr->index, msr->reset_val); + __wrmsr(msr->index, msr->write_val); + GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val)); + + __rdmsr(msr->index, msr->reset_val); +} + +static void guest_test_unsupported_msr(const struct kvm_msr *msr) +{ + u64 val; + u8 vec; + + /* + * KVM's ABI with respect to ignore_msrs is a mess and largely beyond + * repair, just skip the unsupported MSR tests. + */ + if (ignore_unsupported_msrs) + goto skip_wrmsr_gp; + + /* + * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are + * writable only if their associated feature is supported. Skip the + * RDMSR #GP test if the secondary feature is supported, but perform + * the WRMSR #GP test as the to-be-written value is tied to the primary + * feature. For all other MSRs, simply do nothing. + */ + if (this_cpu_has(msr->feature2)) { + if (msr->index != MSR_IA32_U_CET && + msr->index != MSR_IA32_S_CET) + goto skip_wrmsr_gp; + + goto skip_rdmsr_gp; + } + + vec = rdmsr_safe(msr->index, &val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s", + msr->index, ex_str(vec)); + +skip_rdmsr_gp: + vec = wrmsr_safe(msr->index, msr->write_val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->write_val, ex_str(vec)); + +skip_wrmsr_gp: + GUEST_SYNC(0); +} + +void guest_test_reserved_val(const struct kvm_msr *msr) +{ + /* Skip reserved value checks as well, ignore_msrs is trully a mess. */ + if (ignore_unsupported_msrs) + return; + + /* + * If the CPU will truncate the written value (e.g. SYSENTER on AMD), + * expect success and a truncated value, not #GP. + */ + if (!this_cpu_has(msr->feature) || + msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { + u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); + + __GUEST_ASSERT(vec == GP_VECTOR, + "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->rsvd_val, ex_str(vec)); + } else { + __wrmsr(msr->index, msr->rsvd_val); + __wrmsr(msr->index, msr->reset_val); + } +} + +static void guest_main(void) +{ + for (;;) { + const struct kvm_msr *msr = &msrs[READ_ONCE(idx)]; + + if (this_cpu_has(msr->feature)) + guest_test_supported_msr(msr); + else + guest_test_unsupported_msr(msr); + + if (msr->rsvd_val) + guest_test_reserved_val(msr); + + GUEST_SYNC(msr->reset_val); + } +} + +static bool has_one_reg; +static bool use_one_reg; + +#define KVM_X86_MAX_NR_REGS 1 + +static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg) +{ + struct { + struct kvm_reg_list list; + u64 regs[KVM_X86_MAX_NR_REGS]; + } regs = {}; + int r, i; + + /* + * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported + * regs, then the vCPU obviously doesn't support the reg. + */ + r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + if (!r) + return false; + + TEST_ASSERT_EQ(errno, E2BIG); + + /* + * KVM x86 is expected to support enumerating a relative small number + * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG + * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For + * simplicity, hardcode the maximum number of regs and manually update + * the test as necessary. + */ + TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS, + "KVM reports %llu regs, test expects at most %u regs, stale test?", + regs.list.n, KVM_X86_MAX_NR_REGS); + + vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + for (i = 0; i < regs.list.n; i++) { + if (regs.regs[i] == reg) + return true; + } + + return false; +} + +static void host_test_kvm_reg(struct kvm_vcpu *vcpu) +{ + bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature); + u64 reset_val = msrs[idx].reset_val; + u64 write_val = msrs[idx].write_val; + u64 rsvd_val = msrs[idx].rsvd_val; + u32 reg = msrs[idx].index; + u64 val; + int r; + + if (!use_one_reg) + return; + + TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg); + + if (!has_reg) { + r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val); + TEST_ASSERT(r && errno == EINVAL, + "Expected failure on get_reg(0x%x)", reg); + rsvd_val = 0; + goto out; + } + + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, reg, val); + + vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val); + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + write_val, reg, val); + +out: + r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val); + TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val); +} + +static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val) +{ + u64 reset_val = msrs[idx].reset_val; + u32 msr = msrs[idx].index; + u64 val; + + if (!kvm_cpu_has(msrs[idx].feature)) + return; + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + guest_val, msr, val); + + if (use_one_reg) + vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val); + else + vcpu_set_msr(vcpu, msr, reset_val); + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + reset_val, msr, val); + + if (!has_one_reg) + return; + + val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, msr, val); +} + +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + for (;;) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + host_test_msr(vcpu, uc.args[1]); + return; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_DONE: + TEST_FAIL("Unexpected UCALL_DONE"); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS) +{ + int i; + + for (i = 0; i < NR_VCPUS; i++) + do_vcpu_run(vcpus[i]); +} + +#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL) + +static void test_msrs(void) +{ + const struct kvm_msr __msrs[] = { + MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE, + MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING, + MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE), + MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE), + + /* + * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add + * entries for each features so that TSC_AUX doesn't exists for + * the "unsupported" vCPU, and obviously to test both cases. + */ + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID), + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP), + + MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE), + /* + * SYSENTER_{ESP,EIP} are technically non-canonical on Intel, + * but KVM doesn't emulate that behavior on emulated writes, + * i.e. this test will observe different behavior if the MSR + * writes are handed by hardware vs. KVM. KVM's behavior is + * intended (though far from ideal), so don't bother testing + * non-canonical values. + */ + MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE), + MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE), + + MSR_TEST_CANONICAL(MSR_FS_BASE, LM), + MSR_TEST_CANONICAL(MSR_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_LSTAR, LM), + MSR_TEST_CANONICAL(MSR_CSTAR, LM), + MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM), + + MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK), + + MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK), + }; + + const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE; + const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM; + + /* + * Create three vCPUs, but run them on the same task, to validate KVM's + * context switching of MSR state. Don't pin the task to a pCPU to + * also validate KVM's handling of cross-pCPU migration. Use the full + * set of features for the first two vCPUs, but clear all features in + * third vCPU in order to test both positive and negative paths. + */ + const int NR_VCPUS = 3; + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct kvm_vm *vm; + int i; + + kvm_static_assert(sizeof(__msrs) <= sizeof(msrs)); + kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs)); + memcpy(msrs, __msrs, sizeof(__msrs)); + + ignore_unsupported_msrs = kvm_is_ignore_msrs(); + + vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus); + + sync_global_to_guest(vm, msrs); + sync_global_to_guest(vm, ignore_unsupported_msrs); + + /* + * Clear features in the "unsupported features" vCPU. This needs to be + * done before the first vCPU run as KVM's ABI is that guest CPUID is + * immutable once the vCPU has been run. + */ + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + /* + * Don't clear LM; selftests are 64-bit only, and KVM doesn't + * honor LM=0 for MSRs that are supposed to exist if and only + * if the vCPU is a 64-bit model. Ditto for NONE; clearing a + * fake feature flag will result in false failures. + */ + if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) && + memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none))) + vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature); + } + + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + struct kvm_msr *msr = &msrs[idx]; + + if (msr->is_kvm_defined) { + for (i = 0; i < NR_VCPUS; i++) + host_test_kvm_reg(vcpus[i]); + continue; + } + + /* + * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST + * are consistent with respect to MSRs whose existence is + * enumerated via CPUID. Skip the check for FS/GS.base MSRs, + * as they aren't reported in the save/restore list since their + * state is managed via SREGS. + */ + TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE || + kvm_msr_is_in_save_restore_list(msr->index) == + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)), + "%s %s in save/restore list, but %s according to CPUID", msr->name, + kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't", + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ? + "supported" : "unsupported"); + + sync_global_to_guest(vm, idx); + + vcpus_run(vcpus, NR_VCPUS); + vcpus_run(vcpus, NR_VCPUS); + } + + kvm_vm_free(vm); +} + +int main(void) +{ + has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG); + + test_msrs(); + + if (has_one_reg) { + use_one_reg = true; + test_msrs(); + } +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c index dad988351493..f001cb836bfa 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_close_while_nested - * * Copyright (C) 2019, Red Hat, Inc. * * Verify that nothing bad happens if a KVM user exits with open @@ -12,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -22,6 +21,8 @@ enum { PORT_L0_EXIT = 0x2000, }; +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(void) { /* Exit to L0 */ @@ -29,9 +30,8 @@ static void l2_guest_code(void) : : [port] "d" (PORT_L0_EXIT) : "rax"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_vmx_code(struct vmx_pages *vmx_pages) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); @@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(0); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Prepare the VMCB for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(0); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); for (;;) { volatile struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c new file mode 100644 index 000000000000..abc824dba04f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +enum { + SVM_F, + VMX_F, + NR_VIRTUALIZATION_FLAVORS, +}; + +struct emulated_instruction { + const char name[32]; + uint8_t opcode[15]; + uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS]; +}; + +static struct emulated_instruction instructions[] = { + { + .name = "pause", + .opcode = { 0xf3, 0x90 }, + .exit_reason = { SVM_EXIT_PAUSE, + EXIT_REASON_PAUSE_INSTRUCTION, } + }, + { + .name = "hlt", + .opcode = { 0xf4 }, + .exit_reason = { SVM_EXIT_HLT, + EXIT_REASON_HLT, } + }, +}; + +static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; +static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; + +static uint32_t get_instruction_length(struct emulated_instruction *insn) +{ + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) + ; + + return i; +} + +static void guest_code(void *test_data) +{ + int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F; + int i; + + memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep)); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, NULL, NULL); + vmcb->save.idtr.limit = 0; + vmcb->save.rip = (u64)l2_guest_code; + + vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) | + BIT_ULL(INTERCEPT_PAUSE) | + BIT_ULL(INTERCEPT_HLT); + vmcb->control.intercept_exceptions = 0; + } else { + GUEST_ASSERT(prepare_for_vmx_operation(test_data)); + GUEST_ASSERT(load_vmcs(test_data)); + + prepare_vmcs(test_data, NULL, NULL); + GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0)); + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0)); + + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_HLT_EXITING); + } + + for (i = 0; i < ARRAY_SIZE(instructions); i++) { + struct emulated_instruction *insn = &instructions[i]; + uint32_t insn_len = get_instruction_length(insn); + uint32_t exit_insn_len; + u32 exit_reason; + + /* + * Copy the target instruction to the L2 code stream, and fill + * the remaining bytes with INT3s so that a missed intercept + * results in a consistent failure mode (SHUTDOWN). + */ + memcpy(l2_instruction, insn->opcode, insn_len); + memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + run_guest(vmcb, svm->vmcb_gpa); + exit_reason = vmcb->control.exit_code; + exit_insn_len = vmcb->control.next_rip - vmcb->save.rip; + GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction); + } else { + GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0); + exit_reason = vmreadz(VM_EXIT_REASON); + exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction); + } + + __GUEST_ASSERT(exit_reason == insn->exit_reason[f], + "Wanted exit_reason '0x%x' for '%s', got '0x%x'", + insn->exit_reason[f], insn->name, exit_reason); + + __GUEST_ASSERT(exit_insn_len == insn_len, + "Wanted insn_len '%u' for '%s', got '%u'", + insn_len, insn->name, exit_insn_len); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa39..3641a42934ac 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c new file mode 100644 index 000000000000..a6b6da9cf7fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * This test verifies that L1 fails to enter L2 with an invalid CR3, and + * succeeds otherwise. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + vmcall(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = svm->vmcb->save.cr3; + svm->vmcb->save.cr3 = -1ull; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR); + + /* Now restore CR3 and make sure L2 runs successfully */ + svm->vmcb->save.cr3 = save_cr3; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + + /* Now restore CR3 and make sure L2 runs successfully */ + vmwrite(GUEST_CR3, save_cr3); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t guest_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2ceb5c78c442..2839f650e5c9 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_tsc_adjust_test - * * Copyright (C) 2018, Google LLC. * * IA32_TSC_ADJUST test @@ -22,6 +20,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -35,6 +34,8 @@ #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) +#define L2_GUEST_STACK_SIZE 64 + enum { PORT_ABORT = 0x1000, PORT_REPORT, @@ -72,42 +73,47 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_guest_code(void *data) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; + /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + /* + * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not + * intercepting the write so it should update L1's TSC_ADJUST. + */ + if (this_cpu_has(X86_FEATURE_VMX)) { + struct vmx_pages *vmx_pages = data; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + } else { + struct svm_test_data *svm = data; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + } check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - GUEST_DONE(); } @@ -119,16 +125,19 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t nested_gva; struct kvm_vcpu *vcpu; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (;;) { struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index 1759fa5cb3f2..4260c9e4f489 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" /* L2 is scaled up (from L1's perspective) by this factor */ @@ -79,7 +80,30 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC scaling for L2 */ + wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32); + + /* launch L2 */ + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; @@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva = 0; uint64_t tsc_start, tsc_end; uint64_t tsc_khz; @@ -129,7 +161,8 @@ int main(int argc, char *argv[]) uint64_t l1_tsc_freq = 0; uint64_t l2_tsc_freq = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); @@ -152,8 +185,13 @@ int main(int argc, char *argv[]) printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..c0d84827f736 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh index caad084b8bfd..caad084b8bfd 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index eda88080c186..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -72,8 +72,6 @@ int main(int argc, char *argv[]) } done: - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 96446134c00b..3eaa216b96c0 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -7,19 +7,86 @@ #include "pmu.h" #include "processor.h" -/* Number of LOOP instructions for the guest measurement payload. */ -#define NUM_BRANCHES 10 +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* + * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, + * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 6 + /* * Number of "extra" instructions that will be counted, i.e. the number of - * instructions that are needed to set up the loop and then disabled the - * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR. + * instructions that are needed to set up the loop and then disable the + * counter. 2 MOV, 2 XOR, 1 WRMSR. */ -#define NUM_EXTRA_INSNS 7 -#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) +#define NUM_EXTRA_INSNS 5 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + +/* Track which architectural events are supported by hardware. */ +static uint32_t hardware_pmu_arch_events; static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +struct kvm_intel_pmu_event { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; +}; + +/* + * Wrap the array to appease the compiler, as the macros used to construct each + * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the + * compiler often thinks the feature definitions aren't compile-time constants. + */ +static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) +{ + const struct kvm_intel_pmu_event __intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same as the + * general purpose architectural event. The fixed counter explicitly + * counts at the same frequency as the TSC, whereas the GP event counts + * at a fixed, but uarch specific, frequency. Bundle them here for + * simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, + }; + + kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); + + return __intel_event_to_feature[idx]; +} + static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, uint8_t pmu_version, @@ -29,6 +96,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, vm = vm_create_with_one_vcpu(vcpu, guest_code); sync_global_to_guest(vm, kvm_pmu_version); + sync_global_to_guest(vm, hardware_pmu_arch_events); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -85,22 +153,28 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, - struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr) +static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) { uint64_t count; count = _rdpmc(pmc); - if (!this_pmu_has(event)) + if (!(hardware_pmu_arch_events & BIT(idx))) goto sanity_checks; switch (idx) { case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCHES); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -110,17 +184,22 @@ static void guest_assert_event_count(uint8_t idx, fallthrough; case INTEL_ARCH_CPU_CYCLES_INDEX: case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: + case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: - GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: + __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, + "Expected top-down slots >= %u, got count = %lu", + NUM_INSNS_RETIRED, count); break; default: break; } sanity_checks: - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); GUEST_ASSERT_EQ(_rdpmc(pmc), count); wrmsr(pmc_msr, 0xdead); @@ -134,8 +213,8 @@ sanity_checks: * before the end of the sequence. * * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the - * start of the loop to force LLC references and misses, i.e. to allow testing - * that those events actually count. + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. * * If forced emulation is enabled (and specified), force emulation on a subset * of the measured code to verify that KVM correctly emulates instructions and @@ -145,78 +224,48 @@ sanity_checks: #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ do { \ __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ + FEP "enter $0, $0\n\t" \ clflush "\n\t" \ "mfence\n\t" \ - "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \ - FEP "loop .\n\t" \ + "mov %[m], %%eax\n\t" \ + FEP "leave\n\t" \ + FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ "wrmsr\n\t" \ :: "a"((uint32_t)_value), "d"(_value >> 32), \ - "c"(_msr), "D"(_msr) \ + "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ ); \ } while (0) -#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ do { \ - wrmsr(pmc_msr, 0); \ + wrmsr(_pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ - guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ + guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr, +static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, uint32_t ctrl_msr, uint64_t ctrl_msr_value) { - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); if (is_forced_emulation_enabled) - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); -} - -#define X86_PMU_FEATURE_NULL \ -({ \ - struct kvm_x86_pmu_feature feature = {}; \ - \ - feature; \ -}) - -static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) -{ - return !(*(u64 *)&event); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); } static void guest_test_arch_event(uint8_t idx) { - const struct { - struct kvm_x86_pmu_feature gp_event; - struct kvm_x86_pmu_feature fixed_event; - } intel_event_to_feature[] = { - [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, - /* - * Note, the fixed counter for reference cycles is NOT the same - * as the general purpose architectural event. The fixed counter - * explicitly counts at the same frequency as the TSC, whereas - * the GP event counts at a fixed, but uarch specific, frequency. - * Bundle them here for simplicity. - */ - [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, - [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, - }; - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint32_t pmu_version = guest_get_pmu_version(); /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ @@ -234,7 +283,7 @@ static void guest_test_arch_event(uint8_t idx) else base_pmc_msr = MSR_IA32_PERFCTR0; - gp_event = intel_event_to_feature[idx].gp_event; + gp_event = intel_event_to_feature(idx).gp_event; GUEST_ASSERT_EQ(idx, gp_event.f.bit); GUEST_ASSERT(nr_gp_counters); @@ -248,14 +297,14 @@ static void guest_test_arch_event(uint8_t idx) if (guest_has_perf_global_ctrl) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); - __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, + __guest_test_arch_event(idx, i, base_pmc_msr + i, MSR_P6_EVNTSEL0 + i, eventsel); } if (!guest_has_perf_global_ctrl) return; - fixed_event = intel_event_to_feature[idx].fixed_event; + fixed_event = intel_event_to_feature(idx).fixed_event; if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) return; @@ -263,7 +312,7 @@ static void guest_test_arch_event(uint8_t idx) wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, + __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, MSR_CORE_PERF_FIXED_CTR0 + i, MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); @@ -280,7 +329,7 @@ static void guest_test_arch_events(void) } static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t length, uint8_t unavailable_mask) + uint8_t length, uint32_t unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -289,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, if (!pmu_version) return; + unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, + X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, pmu_version, perf_capabilities); @@ -313,13 +365,13 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ - "Expected %s on " #insn "(0x%x), got vector %u", \ - expect_gp ? "#GP" : "no fault", msr, vector) \ + "Expected %s on " #insn "(0x%x), got %s", \ + expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ - __GUEST_ASSERT(val == expected_val, \ + __GUEST_ASSERT(val == expected, \ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ - msr, expected_val, val); + msr, expected, val); static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, uint64_t expected_val) @@ -500,7 +552,7 @@ static void guest_test_fixed_counters(void) wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); @@ -531,7 +583,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void test_intel_counters(void) { - uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); @@ -545,6 +596,26 @@ static void test_intel_counters(void) }; /* + * To keep the total runtime reasonable, test only a handful of select, + * semi-arbitrary values for the mask of unavailable PMU events. Test + * 0 (all events available) and all ones (no events available) as well + * as alternating bit sequencues, e.g. to detect if KVM is checking the + * wrong bit(s). + */ + const uint32_t unavailable_masks[] = { + 0x0, + 0xffffffffu, + 0xaaaaaaaau, + 0x55555555u, + 0xf0f0f0f0u, + 0x0f0f0f0fu, + 0xa0a0a0a0u, + 0x0a0a0a0au, + 0x50505050u, + 0x05050505u, + }; + + /* * Test up to PMU v5, which is the current maximum version defined by * Intel, i.e. is the last version that is guaranteed to be backwards * compatible with KVM's existing behavior. @@ -553,18 +624,26 @@ static void test_intel_counters(void) /* * Detect the existence of events that aren't supported by selftests. - * This will (obviously) fail any time the kernel adds support for a - * new event, but it's worth paying that price to keep the test fresh. + * This will (obviously) fail any time hardware adds support for a new + * event, but it's worth paying that price to keep the test fresh. */ - TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, + TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, "New architectural event(s) detected; please update this test (length = %u, mask = %x)", - nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), + this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); /* - * Force iterating over known arch events regardless of whether or not - * KVM/hardware supports a given event. + * Iterate over known arch events irrespective of KVM/hardware support + * to verify that KVM doesn't reject programming of events just because + * the *architectural* encoding is unsupported. Track which events are + * supported in hardware; the guest side will validate supported events + * count correctly, even if *enumeration* of the event is unsupported + * by KVM and/or isn't exposed to the guest. */ - nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { + if (this_pmu_has(intel_event_to_feature(i).gp_event)) + hardware_pmu_arch_events |= BIT(i); + } for (v = 0; v <= max_pmu_version; v++) { for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { @@ -573,16 +652,7 @@ static void test_intel_counters(void) pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", v, perf_caps[i]); - /* - * To keep the total runtime reasonable, test every - * possible non-zero, non-reserved bitmap combination - * only with the native PMU version and the full bit - * vector length. - */ - if (v == pmu_version) { - for (k = 1; k < (BIT(nr_arch_events) - 1); k++) - test_arch_events(v, perf_caps[i], nr_arch_events, k); - } + /* * Test single bits for all PMU version and lengths up * the number of events +1 (to verify KVM doesn't do @@ -590,12 +660,9 @@ static void test_intel_counters(void) * host length). Explicitly test a mask of '0' and all * ones i.e. all events being available and unavailable. */ - for (j = 0; j <= nr_arch_events + 1; j++) { - test_arch_events(v, perf_caps[i], j, 0); - test_arch_events(v, perf_caps[i], j, 0xff); - - for (k = 0; k < nr_arch_events; k++) - test_arch_events(v, perf_caps[i], j, BIT(k)); + for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { + for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) + test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); } pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index 26b3e7efe5dd..1c5b7611db24 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -32,8 +32,8 @@ struct __kvm_pmu_event_filter { /* * This event list comprises Intel's known architectural events, plus AMD's - * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs). - * Note, AMD and Intel use the same encoding for instructions retired. + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. */ kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); @@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) do { \ uint64_t br = pmc_results.branches_retired; \ uint64_t ir = pmc_results.instructions_retired; \ + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ + br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ - if (br && br != NUM_BRANCHES) \ + if (br && !br_matched) \ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ __func__, br, NUM_BRANCHES); \ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ @@ -353,38 +355,13 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t family, uint32_t model) -{ - return family == 0x17 && model <= 0x0f; -} - -static bool is_zen2(uint32_t family, uint32_t model) -{ - return family == 0x17 && model >= 0x30 && model <= 0x3f; -} - -static bool is_zen3(uint32_t family, uint32_t model) -{ - return family == 0x19 && model <= 0x0f; -} - /* - * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. The AMD - * test code herein has been verified to work on Zen1, Zen2, and Zen3. - * - * Feel free to add more AMD CPUs that are documented to support event - * select 0xc2 umask 0 as "retired branch instructions." + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. */ static bool use_amd_pmu(void) { - uint32_t family = kvm_cpu_family(); - uint32_t model = kvm_cpu_model(); - - return host_cpu_is_amd && - (is_zen1(family, model) || - is_zen2(family, model) || - is_zen3(family, model)); + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; } /* diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 82a8d88b5338..1969f4ab9b28 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; pthread_t threads[KVM_MAX_VCPUS]; struct kvm_vm *vm; - int memfd, i, r; + int memfd, i; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, @@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t * should prevent the VM from being fully destroyed until the last * reference to the guest_memfd is also put. */ - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); - - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); close(memfd); } diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 13e72fcec8dd..13e72fcec8dd 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c index cbc92a862ea9..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index d691d86e5bc3..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c @@ -33,6 +33,20 @@ static void guest_not_bsp_vcpu(void *arg) GUEST_DONE(); } +static void test_set_invalid_bsp(struct kvm_vm *vm) +{ + unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + int r; + + if (max_vcpu_id) { + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); + } + + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); +} + static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) { int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, @@ -80,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, vm = vm_create(nr_vcpus); + test_set_invalid_bsp(vm); + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index c021c0795a96..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -41,13 +41,15 @@ do { \ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ } while (0) +#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + static uint64_t calc_supported_cr4_feature_bits(void) { - uint64_t cr4; + uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; if (kvm_cpu_has(X86_FEATURE_LA57)) @@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -int main(int argc, char *argv[]) +static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) { struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; int rc, i; - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - + sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + sregs.cr4 |= cr4; rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); - /* Verify all unsupported features are rejected by KVM. */ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); @@ -119,10 +116,28 @@ int main(int argc, char *argv[]) /* NW without CD is illegal, as is PG without PE. */ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); +} + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify KVM enforces guest CPUID even if *userspace* never + * sets CPUID. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4); kvm_vm_free(vm); - /* Create a "real" VM and verify APIC_BASE can be set. */ + /* Create a "real" VM with a fully populated guest CPUID and verify + * APIC_BASE and all supported CR4 can be set. + */ vm = vm_create_with_one_vcpu(&vcpu, NULL); vcpu_sregs_get(vcpu, &sregs); @@ -135,6 +150,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); + test_cr_bits(vcpu, calc_supported_cr4_feature_bits()); + kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index 3fb967f40c6a..b238615196ad 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -28,6 +28,7 @@ int kvm_fd; u64 supported_vmsa_features; bool have_sev_es; +bool have_snp; static int __sev_ioctl(int vm_fd, int cmd_id, void *data) { @@ -83,6 +84,9 @@ void test_vm_types(void) if (have_sev_es) test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + if (have_snp) + test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){}); + test_init2_invalid(0, &(struct kvm_sev_init){}, "VM type is KVM_X86_DEFAULT_VM"); if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) @@ -138,15 +142,24 @@ int main(int argc, char *argv[]) "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP); + TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)), + "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not", + kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM); + test_vm_types(); test_flags(KVM_X86_SEV_VM); if (have_sev_es) test_flags(KVM_X86_SEV_ES_VM); + if (have_snp) + test_flags(KVM_X86_SNP_VM); test_features(KVM_X86_SEV_VM, 0); if (have_sev_es) test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + if (have_snp) + test_features(KVM_X86_SNP_VM, supported_vmsa_features); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c index 0a6dfba3905b..0a6dfba3905b 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 7c70c0da4fb7..86ad1c7d068f 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -16,6 +16,18 @@ #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) +static void guest_snp_code(void) +{ + uint64_t sev_msr = rdmsr(MSR_AMD64_SEV); + + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + vmgexit(); +} + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ @@ -27,7 +39,7 @@ static void guest_sev_es_code(void) * force "termination" to signal "done" via the GHCB MSR protocol. */ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); - __asm__ __volatile__("rep; vmmcall"); + vmgexit(); } static void guest_sev_code(void) @@ -41,8 +53,8 @@ static void guest_sev_code(void) /* Stash state passed via VMSA before any compiled code runs. */ extern void guest_code_xsave(void); asm("guest_code_xsave:\n" - "mov $-1, %eax\n" - "mov $-1, %edx\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" "xsave (%rdi)\n" "jmp guest_sev_es_code"); @@ -52,7 +64,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } @@ -61,7 +74,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) abort(); } -static void test_sync_vmsa(uint32_t policy) +static void test_sync_vmsa(uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -70,25 +83,14 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - struct kvm_sregs sregs; - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = XFEATURE_MASK_X87_AVX, - }; - - vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + + vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, MEM_REGION_TEST_DATA); hva = addr_gva2hva(vm, gva); vcpu_args_set(vcpu, 1, gva); - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_xcrs_set(vcpu, &xcrs); asm("fninit\n" "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" "fldl %3\n" @@ -99,10 +101,10 @@ static void test_sync_vmsa(uint32_t policy) : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); vcpu_xsave_set(vcpu, &xsave); - vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + vm_sev_launch(vm, policy, NULL); /* This page is shared, so make it decrypted. */ - memset(hva, 0, 4096); + memset(hva, 0, PAGE_SIZE); vcpu_run(vcpu); @@ -118,14 +120,12 @@ static void test_sync_vmsa(uint32_t policy) kvm_vm_free(vm); } -static void test_sev(void *guest_code, uint64_t policy) +static void test_sev(void *guest_code, uint32_t type, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); /* TODO: Validate the measurement is as expected. */ @@ -134,7 +134,7 @@ static void test_sev(void *guest_code, uint64_t policy) for (;;) { vcpu_run(vcpu); - if (policy & SEV_POLICY_ES) { + if (is_sev_es_vm(vm)) { TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, "Wanted SYSTEM_EVENT, got %s", exit_reason_str(vcpu->run->exit_reason)); @@ -160,23 +160,70 @@ static void test_sev(void *guest_code, uint64_t policy) kvm_vm_free(vm); } +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + set_idt(&idt); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_shutdown(uint32_t type, uint64_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, policy, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + +static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy) +{ + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + + if (type == KVM_X86_SNP_VM) + test_sev(guest, type, policy | SNP_POLICY_DBG); + else + test_sev(guest, type, policy | SEV_POLICY_NO_DBG); + test_sev(guest, type, policy); + + if (type == KVM_X86_SEV_VM) + return; + + test_sev_shutdown(type, policy); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { + test_sync_vmsa(type, policy); + if (type == KVM_X86_SNP_VM) + test_sync_vmsa(type, policy | SNP_POLICY_DBG); + else + test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - test_sev(guest_sev_code, SEV_POLICY_NO_DBG); - test_sev(guest_sev_code, 0); + test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0); - if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { - test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); - test_sev(guest_sev_es_code, SEV_POLICY_ES); + if (kvm_cpu_has(X86_FEATURE_SEV_ES)) + test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES); - if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { - test_sync_vmsa(0); - test_sync_vmsa(SEV_POLICY_NO_DBG); - } - } + if (kvm_cpu_has(X86_FEATURE_SEV_SNP)) + test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy()); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index fabeeaddfb3a..fabeeaddfb3a 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d664a94..55c88d664a94 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 1c756db329e5..f2c7a1c297e3 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -141,15 +141,10 @@ static void __attribute__((__flatten__)) guest_code(void *arg) if (this_cpu_has(X86_FEATURE_XSAVE)) { uint64_t supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[4096]; + uint8_t buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xsetbv(0, xgetbv(0) | supported_xcr0); - /* * Modify state for all supported xfeatures to take them out of * their "init" state, i.e. to make them show up in XSTATE_BV. diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..917b6066cfc1 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm) x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); GUEST_ASSERT(vintr_irq_called); GUEST_ASSERT(intr_irq_called); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index 00135cbba35e..00135cbba35e 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 7b6481d6c0d3..7b6481d6c0d3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..8a62cca28cfb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 8fa3948b0170..8fa3948b0170 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..56306a19144a 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index 12b0964f4f13..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 59c7304f805e..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..1e5e564523b3 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void) wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); /* Enables interrupt in guest. */ - asm volatile("sti"); + sti(); /* Let user space inject the first UCNA */ GUEST_SYNC(SYNC_FIRST_UCNA); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f..be7d72f3c029 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); + memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE); vcpu_regs_set(vcpu, ®s); } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..8463a9956410 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); run_guest_then_process_ucall_done(vcpu); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index a81a24761aac..a81a24761aac 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index fa512d033205..98cb6bdab3e6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -120,17 +120,17 @@ static void test_vmx_dirty_log(bool enable_ept) * GPAs as the EPT enabled case. */ if (enable_ept) { - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } bmap = bitmap_zalloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept) } TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a100ee5f0009 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c new file mode 100644 index 000000000000..cf1d2d1f2a8f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * Test KVM's ability to save and restore nested state when the L1 guest + * is using 5-level paging and the L2 guest is using 4-level paging. + * + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86: + * model canonical checks more precisely"). + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define LA57_GS_BASE 0xff2bc0311fb00000ull + +static void l2_guest_code(void) +{ + /* + * Sync with L0 to trigger save/restore. After + * resuming, execute VMCALL to exit back to L1. + */ + GUEST_SYNC(1); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 guest_cr4; + vm_paddr_t pml5_pa, pml4_pa; + u64 *pml5; + u64 exit_reason; + + /* Set GS_BASE to a value that is only canonical with LA57. */ + wrmsr(MSR_GS_BASE, LA57_GS_BASE); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Set up L2 with a 4-level page table by pointing its CR3 to + * L1's first PML4 table and clearing CR4.LA57. This creates + * the CR4.LA57 mismatch that exercises the bug. + */ + pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK; + pml5 = (u64 *)pml5_pa; + pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK; + vmwrite(GUEST_CR3, pml4_pa); + + guest_cr4 = vmreadz(GUEST_CR4); + guest_cr4 &= ~X86_CR4_LA57; + vmwrite(GUEST_CR4, guest_cr4); + + GUEST_ASSERT(!vmlaunch()); + + exit_reason = vmreadz(VM_EXIT_REASON); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + l1_guest_code(vmx_pages); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* + * L1 needs to read its own PML5 table to set up L2. Identity map + * the PML5 table to facilitate this. + */ + virt_map(vm, vm->pgd, vm->pgd, 1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(uc.args[1] == stage, + "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]); + if (stage == 1) { + pr_info("L2 is active; performing save/restore.\n"); + state = vcpu_save_state(vcpu); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index 7c92536551cc..7ff6f62e20a3 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -29,7 +29,7 @@ static union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; + u64 pebs_timing_info:1; }; u64 capabilities; } host_cap; @@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = { .pebs_arch_reg = 1, .pebs_format = -1, .pebs_baseline = 1, + .pebs_timing_info = 1, }; static const union perf_capabilities format_caps = { @@ -56,8 +57,8 @@ static void guest_test_perf_capabilities_gp(uint64_t val) uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%lx', got vector '0x%x'", - val, vector); + "Expected #GP for value '0x%lx', got %s", + val, ex_str(vector)); } static void guest_code(uint64_t current_val) @@ -207,6 +208,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); } +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_is_pmu_enabled()); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index 00dd2ac07a61..00dd2ac07a61 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c index 67a62a5a8895..67a62a5a8895 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..ae4a4b6c05ca 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data) data->halter_tpr = xapic_read_reg(APIC_TASKPRI); data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; - asm volatile("sti; hlt; cli"); + safe_halt(); + cli(); data->wake_count++; } } @@ -255,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, int nodes = 0; time_t start_time, last_update, now; time_t interval_secs = 1; - int i, r; + int i; int from, to; unsigned long bit; uint64_t hlt_count; @@ -266,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, delay_usecs); /* Get set of first 64 numa nodes available */ - r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 0, MPOL_F_MEMS_ALLOWED); - TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " "(each 1-bit indicates node is present): %#lx\n", @@ -465,6 +465,19 @@ int main(int argc, char *argv[]) cancel_join_vcpu_thread(threads[0], params[0].vcpu); cancel_join_vcpu_thread(threads[1], params[1].vcpu); + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + fprintf(stderr, "Test successful after running for %d seconds.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 69849acd95b0..3b4814c55722 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -13,11 +13,12 @@ struct xapic_vcpu { struct kvm_vcpu *vcpu; bool is_x2apic; + bool has_xavic_errata; }; static void xapic_guest_code(void) { - asm volatile("cli"); + cli(); xapic_enable(); @@ -31,9 +32,13 @@ static void xapic_guest_code(void) } } +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + static void x2apic_guest_code(void) { - asm volatile("cli"); + cli(); x2apic_enable(); @@ -41,7 +46,12 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - x2apic_write_reg(APIC_ICR, val); + if (val & X2APIC_RSVD_BITS_MASK) { + x2apic_write_reg_fault(APIC_ICR, val); + } else { + x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } GUEST_SYNC(val); } while (1); } @@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { - val &= (-1u | (0xffull << (32 + 24))); - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); - } else { - TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { + return; } -} -#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ - GENMASK_ULL(17,16) | \ - GENMASK_ULL(13,13)) + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - if (x->is_x2apic) { - /* Hardware writing vICR register requires reserved bits 31:20, - * 17:16 and 13 kept as zero to avoid #GP exception. Data value - * written to vICR should mask out those bits above. - */ - val &= ~X2APIC_RSVED_BITS_MASK; - } - ____test_icr(x, val | APIC_ICR_BUSY); + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } @@ -109,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, icr | i); /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use + * vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; for (i = 0; i < 0xff; i++) { @@ -184,6 +195,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -204,6 +242,15 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); x.is_x2apic = false; + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); @@ -211,4 +258,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index 95ce192d0753..d038c1571729 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -48,16 +48,16 @@ do { \ static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -79,10 +79,15 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got %s", + ex_str(vector)); + vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%lx), got vector '0x%x'", - supported_xcr0, vector); + "Expected success on XSETBV(0x%lx), got %s", + supported_xcr0, ex_str(vector)); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) @@ -90,8 +95,8 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", - BIT_ULL(i), supported_xcr0, vector); + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s", + BIT_ULL(i), supported_xcr0, ex_str(vector)); } GUEST_DONE(); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..23909b501ac2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -191,10 +191,7 @@ static void guest_code(void) struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; int i; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); /* Trigger an interrupt injection */ GUEST_SYNC(TEST_INJECT_VECTOR); @@ -550,15 +547,9 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; @@ -575,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c index e149d0574961..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c @@ -10,6 +10,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index f331a4e9bae3..f331a4e9bae3 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c deleted file mode 100644 index 624dc725e14d..000000000000 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ /dev/null @@ -1,85 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * CR4 and CPUID sync test - * - * Copyright 2018, Red Hat, Inc. and/or its affiliates. - * - * Author: - * Wei Huang <wei@redhat.com> - */ - -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -static inline bool cr4_cpuid_is_sync(void) -{ - uint64_t cr4 = get_cr4(); - - return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); -} - -static void guest_code(void) -{ - uint64_t cr4; - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - - /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_sregs sregs; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} |
