diff options
Diffstat (limited to 'tools/testing/selftests/kvm')
241 files changed, 26816 insertions, 5948 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..1d41a046a7bf 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,8 @@ !*.h !*.S !*.sh +!.gitignore +!config +!settings +!Makefile +!Makefile.kvm
\ No newline at end of file diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c692cc86e7da..f2b223072b62 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,269 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only -include ../../../build/Build.include - -all: - top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),x86) - ARCH_DIR := x86_64 -else ifeq ($(ARCH),arm64) - ARCH_DIR := aarch64 -else ifeq ($(ARCH),s390) - ARCH_DIR := s390x -else - ARCH_DIR := $(ARCH) -endif - -LIBKVM += lib/assert.c -LIBKVM += lib/elf.c -LIBKVM += lib/guest_modes.c -LIBKVM += lib/io.c -LIBKVM += lib/kvm_util.c -LIBKVM += lib/memstress.c -LIBKVM += lib/rbtree.c -LIBKVM += lib/sparsebit.c -LIBKVM += lib/test_util.c -LIBKVM += lib/ucall_common.c -LIBKVM += lib/userfaultfd_util.c - -LIBKVM_STRING += lib/string_override.c - -LIBKVM_x86_64 += lib/x86_64/apic.c -LIBKVM_x86_64 += lib/x86_64/handlers.S -LIBKVM_x86_64 += lib/x86_64/hyperv.c -LIBKVM_x86_64 += lib/x86_64/memstress.c -LIBKVM_x86_64 += lib/x86_64/processor.c -LIBKVM_x86_64 += lib/x86_64/svm.c -LIBKVM_x86_64 += lib/x86_64/ucall.c -LIBKVM_x86_64 += lib/x86_64/vmx.c - -LIBKVM_aarch64 += lib/aarch64/gic.c -LIBKVM_aarch64 += lib/aarch64/gic_v3.c -LIBKVM_aarch64 += lib/aarch64/handlers.S -LIBKVM_aarch64 += lib/aarch64/processor.c -LIBKVM_aarch64 += lib/aarch64/spinlock.c -LIBKVM_aarch64 += lib/aarch64/ucall.c -LIBKVM_aarch64 += lib/aarch64/vgic.c - -LIBKVM_s390x += lib/s390x/diag318_test_handler.c -LIBKVM_s390x += lib/s390x/processor.c -LIBKVM_s390x += lib/s390x/ucall.c - -LIBKVM_riscv += lib/riscv/processor.c -LIBKVM_riscv += lib/riscv/ucall.c - -# Non-compiled test targets -TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh - -# Compiled test targets -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features -TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test -TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test -TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush -TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test -TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test -TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test -TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test -TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test -TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test -TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test -TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test -TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test -TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test -TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test -TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test -TEST_GEN_PROGS_x86_64 += x86_64/debug_regs -TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test -TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test -TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests -TEST_GEN_PROGS_x86_64 += x86_64/amx_test -TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test -TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test -TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test -TEST_GEN_PROGS_x86_64 += access_tracking_perf_test -TEST_GEN_PROGS_x86_64 += demand_paging_test -TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_x86_64 += dirty_log_perf_test -TEST_GEN_PROGS_x86_64 += hardware_disable_test -TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86_64 += kvm_page_table_test -TEST_GEN_PROGS_x86_64 += max_guest_memory_test -TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test -TEST_GEN_PROGS_x86_64 += memslot_perf_test -TEST_GEN_PROGS_x86_64 += rseq_test -TEST_GEN_PROGS_x86_64 += set_memory_region_test -TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test -TEST_GEN_PROGS_x86_64 += system_counter_offset_test - -# Compiled outputs used by test targets -TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test - -TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs -TEST_GEN_PROGS_aarch64 += aarch64/arch_timer -TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list -TEST_GEN_PROGS_aarch64 += aarch64/hypercalls -TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test -TEST_GEN_PROGS_aarch64 += aarch64/psci_test -TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter -TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config -TEST_GEN_PROGS_aarch64 += aarch64/vgic_init -TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq -TEST_GEN_PROGS_aarch64 += access_tracking_perf_test -TEST_GEN_PROGS_aarch64 += demand_paging_test -TEST_GEN_PROGS_aarch64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_perf_test -TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus -TEST_GEN_PROGS_aarch64 += kvm_page_table_test -TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test -TEST_GEN_PROGS_aarch64 += memslot_perf_test -TEST_GEN_PROGS_aarch64 += rseq_test -TEST_GEN_PROGS_aarch64 += set_memory_region_test -TEST_GEN_PROGS_aarch64 += steal_time -TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test - -TEST_GEN_PROGS_s390x = s390x/memop -TEST_GEN_PROGS_s390x += s390x/resets -TEST_GEN_PROGS_s390x += s390x/sync_regs_test -TEST_GEN_PROGS_s390x += s390x/tprot -TEST_GEN_PROGS_s390x += s390x/cmma_test -TEST_GEN_PROGS_s390x += demand_paging_test -TEST_GEN_PROGS_s390x += dirty_log_test -TEST_GEN_PROGS_s390x += kvm_create_max_vcpus -TEST_GEN_PROGS_s390x += kvm_page_table_test -TEST_GEN_PROGS_s390x += rseq_test -TEST_GEN_PROGS_s390x += set_memory_region_test -TEST_GEN_PROGS_s390x += kvm_binary_stats_test - -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test - -TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) -LIBKVM += $(LIBKVM_$(ARCH_DIR)) - -OVERRIDE_TARGETS = 1 - -# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most -# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, -# which causes the environment variable to override the makefile). -include ../lib.mk - -INSTALL_HDR_PATH = $(top_srcdir)/usr -LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) +# Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include -else -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include + override ARCH := x86 endif -CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD\ - -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ - $(KHDR_INCLUDES) -ifeq ($(ARCH),s390) - CFLAGS += -march=z10 +include Makefile.kvm +else +# Empty targets for unsupported architectures +all: +clean: endif - -no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -# On s390, build the testcases KVM-enabled -pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) - -LDLIBS += -ldl -LDFLAGS += -pthread $(no-pie-option) $(pgste-option) - -LIBKVM_C := $(filter %.c,$(LIBKVM)) -LIBKVM_S := $(filter %.S,$(LIBKVM)) -LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) - -TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) -TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) -TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) -TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) --include $(TEST_DEP_FILES) - -$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o - $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ -$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.* - -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ - -# Compile the string overrides as freestanding to prevent the compiler from -# generating self-referential code, e.g. without "freestanding" the compiler may -# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. -$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c - $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ - -x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -$(TEST_GEN_PROGS): $(LIBKVM_OBJS) -$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) - -cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. -cscope: - $(RM) cscope.* - (find $(include_paths) -name '*.h' \ - -exec realpath --relative-base=$(PWD) {} \;; \ - find . -name '*.c' \ - -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files - cscope -b diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm new file mode 100644 index 000000000000..ba5c2b643efa --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -0,0 +1,356 @@ +# SPDX-License-Identifier: GPL-2.0-only +include ../../../build/Build.include + +all: + +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/lru_gen_util.c +LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c +LIBKVM += lib/ucall_common.c +LIBKVM += lib/userfaultfd_util.c + +LIBKVM_STRING += lib/string_override.c + +LIBKVM_x86 += lib/x86/apic.c +LIBKVM_x86 += lib/x86/handlers.S +LIBKVM_x86 += lib/x86/hyperv.c +LIBKVM_x86 += lib/x86/memstress.c +LIBKVM_x86 += lib/x86/pmu.c +LIBKVM_x86 += lib/x86/processor.c +LIBKVM_x86 += lib/x86/sev.c +LIBKVM_x86 += lib/x86/svm.c +LIBKVM_x86 += lib/x86/ucall.c +LIBKVM_x86 += lib/x86/vmx.c + +LIBKVM_arm64 += lib/arm64/gic.c +LIBKVM_arm64 += lib/arm64/gic_v3.c +LIBKVM_arm64 += lib/arm64/gic_v3_its.c +LIBKVM_arm64 += lib/arm64/handlers.S +LIBKVM_arm64 += lib/arm64/processor.c +LIBKVM_arm64 += lib/arm64/spinlock.c +LIBKVM_arm64 += lib/arm64/ucall.c +LIBKVM_arm64 += lib/arm64/vgic.c + +LIBKVM_s390 += lib/s390/diag318_test_handler.c +LIBKVM_s390 += lib/s390/processor.c +LIBKVM_s390 += lib/s390/ucall.c +LIBKVM_s390 += lib/s390/facility.c + +LIBKVM_riscv += lib/riscv/handlers.S +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c + +LIBKVM_loongarch += lib/loongarch/processor.c +LIBKVM_loongarch += lib/loongarch/ucall.c +LIBKVM_loongarch += lib/loongarch/exception.S + +# Non-compiled test targets +TEST_PROGS_x86 += x86/nx_huge_pages_test.sh + +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + +# Compiled test targets +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test +TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test +TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fastops_test +TEST_GEN_PROGS_x86 += x86/fix_hypercall_test +TEST_GEN_PROGS_x86 += x86/hwcr_msr_test +TEST_GEN_PROGS_x86 += x86/hyperv_clock +TEST_GEN_PROGS_x86 += x86/hyperv_cpuid +TEST_GEN_PROGS_x86 += x86/hyperv_evmcs +TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls +TEST_GEN_PROGS_x86 += x86/hyperv_features +TEST_GEN_PROGS_x86 += x86/hyperv_ipi +TEST_GEN_PROGS_x86 += x86/hyperv_svm_test +TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush +TEST_GEN_PROGS_x86 += x86/kvm_clock_test +TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/kvm_buslock_test +TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/msrs_test +TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test +TEST_GEN_PROGS_x86 += x86/nested_emulation_test +TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/platform_info_test +TEST_GEN_PROGS_x86 += x86/pmu_counters_test +TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test +TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test +TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test +TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id +TEST_GEN_PROGS_x86 += x86/set_sregs_test +TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test +TEST_GEN_PROGS_x86 += x86/smm_test +TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test +TEST_GEN_PROGS_x86 += x86/svm_vmcall_test +TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test +TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync +TEST_GEN_PROGS_x86 += x86/sync_regs_test +TEST_GEN_PROGS_x86 += x86/ucna_injection_test +TEST_GEN_PROGS_x86 += x86/userspace_io_test +TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test +TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state +TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test +TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test +TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test +TEST_GEN_PROGS_x86 += x86/xapic_ipi_test +TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test +TEST_GEN_PROGS_x86 += x86/xss_msr_test +TEST_GEN_PROGS_x86 += x86/debug_regs +TEST_GEN_PROGS_x86 += x86/tsc_msrs_test +TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test +TEST_GEN_PROGS_x86 += x86/xen_shinfo_test +TEST_GEN_PROGS_x86 += x86/xen_vmcall_test +TEST_GEN_PROGS_x86 += x86/sev_init2_tests +TEST_GEN_PROGS_x86 += x86/sev_migrate_tests +TEST_GEN_PROGS_x86 += x86/sev_smoke_test +TEST_GEN_PROGS_x86 += x86/amx_test +TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test +TEST_GEN_PROGS_x86 += x86/triple_fault_event_test +TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test +TEST_GEN_PROGS_x86 += access_tracking_perf_test +TEST_GEN_PROGS_x86 += coalesced_io_test +TEST_GEN_PROGS_x86 += dirty_log_perf_test +TEST_GEN_PROGS_x86 += guest_memfd_test +TEST_GEN_PROGS_x86 += hardware_disable_test +TEST_GEN_PROGS_x86 += memslot_modification_stress_test +TEST_GEN_PROGS_x86 += memslot_perf_test +TEST_GEN_PROGS_x86 += mmu_stress_test +TEST_GEN_PROGS_x86 += rseq_test +TEST_GEN_PROGS_x86 += steal_time +TEST_GEN_PROGS_x86 += system_counter_offset_test +TEST_GEN_PROGS_x86 += pre_fault_memory_test + +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test + +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs +TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/at +TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hello_el2 +TEST_GEN_PROGS_arm64 += arm64/host_sve +TEST_GEN_PROGS_arm64 += arm64/hypercalls +TEST_GEN_PROGS_arm64 += arm64/external_aborts +TEST_GEN_PROGS_arm64 += arm64/page_fault_test +TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/sea_to_user +TEST_GEN_PROGS_arm64 += arm64/set_id_regs +TEST_GEN_PROGS_arm64 += arm64/smccc_filter +TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config +TEST_GEN_PROGS_arm64 += arm64/vgic_init +TEST_GEN_PROGS_arm64 += arm64/vgic_irq +TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access +TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid +TEST_GEN_PROGS_arm64 += access_tracking_perf_test +TEST_GEN_PROGS_arm64 += arch_timer +TEST_GEN_PROGS_arm64 += coalesced_io_test +TEST_GEN_PROGS_arm64 += dirty_log_perf_test +TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += guest_memfd_test +TEST_GEN_PROGS_arm64 += memslot_modification_stress_test +TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += mmu_stress_test +TEST_GEN_PROGS_arm64 += rseq_test +TEST_GEN_PROGS_arm64 += steal_time + +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop +TEST_GEN_PROGS_s390 += s390/resets +TEST_GEN_PROGS_s390 += s390/sync_regs_test +TEST_GEN_PROGS_s390 += s390/tprot +TEST_GEN_PROGS_s390 += s390/cmma_test +TEST_GEN_PROGS_s390 += s390/debug_test +TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test +TEST_GEN_PROGS_s390 += s390/shared_zeropage_test +TEST_GEN_PROGS_s390 += s390/ucontrol_test +TEST_GEN_PROGS_s390 += s390/user_operexec +TEST_GEN_PROGS_s390 += rseq_test + +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += access_tracking_perf_test +TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += dirty_log_perf_test +TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += memslot_modification_stress_test +TEST_GEN_PROGS_riscv += memslot_perf_test +TEST_GEN_PROGS_riscv += mmu_stress_test +TEST_GEN_PROGS_riscv += rseq_test +TEST_GEN_PROGS_riscv += steal_time + +TEST_GEN_PROGS_loongarch = arch_timer +TEST_GEN_PROGS_loongarch += coalesced_io_test +TEST_GEN_PROGS_loongarch += demand_paging_test +TEST_GEN_PROGS_loongarch += dirty_log_perf_test +TEST_GEN_PROGS_loongarch += dirty_log_test +TEST_GEN_PROGS_loongarch += guest_print_test +TEST_GEN_PROGS_loongarch += hardware_disable_test +TEST_GEN_PROGS_loongarch += kvm_binary_stats_test +TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus +TEST_GEN_PROGS_loongarch += kvm_page_table_test +TEST_GEN_PROGS_loongarch += memslot_modification_stress_test +TEST_GEN_PROGS_loongarch += memslot_perf_test +TEST_GEN_PROGS_loongarch += set_memory_region_test + +SPLIT_TESTS += arch_timer +SPLIT_TESTS += get-reg-list + +TEST_PROGS += $(TEST_PROGS_$(ARCH)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH)) +LIBKVM += $(LIBKVM_$(ARCH)) + +OVERRIDE_TARGETS = 1 + +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk +include ../cgroup/lib/libcgroup.mk + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +ifeq ($(ARCH),s390) + CFLAGS += -march=z10 +endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif + +no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + +LDLIBS += -ldl +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) + +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O) +SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ)) +-include $(TEST_DEP_FILES) + +$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) + +$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \ +$(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ +$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TEST_GEN_OBJ) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* + +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +# Compile the string overrides as freestanding to prevent the compiler from +# generating self-referential code, e.g. without "freestanding" the compiler may +# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion. +$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ + +$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c deleted file mode 100644 index 8ef370924a02..000000000000 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ /dev/null @@ -1,484 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch_timer.c - Tests the aarch64 timer IRQ functionality - * - * The test validates both the virtual and physical timer IRQs using - * CVAL and TVAL registers. This consitutes the four stages in the test. - * The guest's main thread configures the timer interrupt for a stage - * and waits for it to fire, with a timeout equal to the timer period. - * It asserts that the timeout doesn't exceed the timer period. - * - * On the other hand, upon receipt of an interrupt, the guest's interrupt - * handler validates the interrupt by checking if the architectural state - * is in compliance with the specifications. - * - * The test provides command-line options to configure the timer's - * period (-p), number of vCPUs (-n), and iterations per stage (-i). - * To stress-test the timer stack even more, an option to migrate the - * vCPUs across pCPUs (-m), at a particular rate, is also provided. - * - * Copyright (c) 2021, Google LLC. - */ - -#define _GNU_SOURCE - -#include <stdlib.h> -#include <pthread.h> -#include <linux/kvm.h> -#include <linux/sizes.h> -#include <linux/bitmap.h> -#include <sys/sysinfo.h> - -#include "kvm_util.h" -#include "processor.h" -#include "delay.h" -#include "arch_timer.h" -#include "gic.h" -#include "vgic.h" - -#define NR_VCPUS_DEF 4 -#define NR_TEST_ITERS_DEF 5 -#define TIMER_TEST_PERIOD_MS_DEF 10 -#define TIMER_TEST_ERR_MARGIN_US 100 -#define TIMER_TEST_MIGRATION_FREQ_MS 2 - -struct test_args { - int nr_vcpus; - int nr_iter; - int timer_period_ms; - int migration_freq_ms; - struct kvm_arm_counter_offset offset; -}; - -static struct test_args test_args = { - .nr_vcpus = NR_VCPUS_DEF, - .nr_iter = NR_TEST_ITERS_DEF, - .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF, - .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS, - .offset = { .reserved = 1 }, -}; - -#define msecs_to_usecs(msec) ((msec) * 1000LL) - -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - -enum guest_stage { - GUEST_STAGE_VTIMER_CVAL = 1, - GUEST_STAGE_VTIMER_TVAL, - GUEST_STAGE_PTIMER_CVAL, - GUEST_STAGE_PTIMER_TVAL, - GUEST_STAGE_MAX, -}; - -/* Shared variables between host and guest */ -struct test_vcpu_shared_data { - int nr_iter; - enum guest_stage guest_stage; - uint64_t xcnt; -}; - -static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; -static pthread_t pt_vcpu_run[KVM_MAX_VCPUS]; -static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS]; - -static int vtimer_irq, ptimer_irq; - -static unsigned long *vcpu_done_map; -static pthread_mutex_t vcpu_done_map_lock; - -static void -guest_configure_timer_action(struct test_vcpu_shared_data *shared_data) -{ - switch (shared_data->guest_stage) { - case GUEST_STAGE_VTIMER_CVAL: - timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(VIRTUAL); - timer_set_ctl(VIRTUAL, CTL_ENABLE); - break; - case GUEST_STAGE_VTIMER_TVAL: - timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(VIRTUAL); - timer_set_ctl(VIRTUAL, CTL_ENABLE); - break; - case GUEST_STAGE_PTIMER_CVAL: - timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(PHYSICAL); - timer_set_ctl(PHYSICAL, CTL_ENABLE); - break; - case GUEST_STAGE_PTIMER_TVAL: - timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms); - shared_data->xcnt = timer_get_cntct(PHYSICAL); - timer_set_ctl(PHYSICAL, CTL_ENABLE); - break; - default: - GUEST_ASSERT(0); - } -} - -static void guest_validate_irq(unsigned int intid, - struct test_vcpu_shared_data *shared_data) -{ - enum guest_stage stage = shared_data->guest_stage; - uint64_t xcnt = 0, xcnt_diff_us, cval = 0; - unsigned long xctl = 0; - unsigned int timer_irq = 0; - unsigned int accessor; - - if (intid == IAR_SPURIOUS) - return; - - switch (stage) { - case GUEST_STAGE_VTIMER_CVAL: - case GUEST_STAGE_VTIMER_TVAL: - accessor = VIRTUAL; - timer_irq = vtimer_irq; - break; - case GUEST_STAGE_PTIMER_CVAL: - case GUEST_STAGE_PTIMER_TVAL: - accessor = PHYSICAL; - timer_irq = ptimer_irq; - break; - default: - GUEST_ASSERT(0); - return; - } - - xctl = timer_get_ctl(accessor); - if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE)) - return; - - timer_set_ctl(accessor, CTL_IMASK); - xcnt = timer_get_cntct(accessor); - cval = timer_get_cval(accessor); - - xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); - - /* Make sure we are dealing with the correct timer IRQ */ - GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq); - - /* Basic 'timer condition met' check */ - GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us); - GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl); - - WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); -} - -static void guest_irq_handler(struct ex_regs *regs) -{ - unsigned int intid = gic_get_and_ack_irq(); - uint32_t cpu = guest_get_vcpuid(); - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; - - guest_validate_irq(intid, shared_data); - - gic_set_eoi(intid); -} - -static void guest_run_stage(struct test_vcpu_shared_data *shared_data, - enum guest_stage stage) -{ - uint32_t irq_iter, config_iter; - - shared_data->guest_stage = stage; - shared_data->nr_iter = 0; - - for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { - /* Setup the next interrupt */ - guest_configure_timer_action(shared_data); - - /* Setup a timeout for the interrupt to arrive */ - udelay(msecs_to_usecs(test_args.timer_period_ms) + - TIMER_TEST_ERR_MARGIN_US); - - irq_iter = READ_ONCE(shared_data->nr_iter); - GUEST_ASSERT_2(config_iter + 1 == irq_iter, - config_iter + 1, irq_iter); - } -} - -static void guest_code(void) -{ - uint32_t cpu = guest_get_vcpuid(); - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; - - local_irq_disable(); - - gic_init(GIC_V3, test_args.nr_vcpus, - (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA); - - timer_set_ctl(VIRTUAL, CTL_IMASK); - timer_set_ctl(PHYSICAL, CTL_IMASK); - - gic_irq_enable(vtimer_irq); - gic_irq_enable(ptimer_irq); - local_irq_enable(); - - guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL); - guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL); - guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL); - guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL); - - GUEST_DONE(); -} - -static void *test_vcpu_run(void *arg) -{ - unsigned int vcpu_idx = (unsigned long)arg; - struct ucall uc; - struct kvm_vcpu *vcpu = vcpus[vcpu_idx]; - struct kvm_vm *vm = vcpu->vm; - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx]; - - vcpu_run(vcpu); - - /* Currently, any exit from guest is an indication of completion */ - pthread_mutex_lock(&vcpu_done_map_lock); - __set_bit(vcpu_idx, vcpu_done_map); - pthread_mutex_unlock(&vcpu_done_map_lock); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - case UCALL_DONE: - break; - case UCALL_ABORT: - sync_global_from_guest(vm, *shared_data); - REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), - vcpu_idx, - shared_data->guest_stage, - shared_data->nr_iter); - break; - default: - TEST_FAIL("Unexpected guest exit\n"); - } - - return NULL; -} - -static uint32_t test_get_pcpu(void) -{ - uint32_t pcpu; - unsigned int nproc_conf; - cpu_set_t online_cpuset; - - nproc_conf = get_nprocs_conf(); - sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset); - - /* Randomly find an available pCPU to place a vCPU on */ - do { - pcpu = rand() % nproc_conf; - } while (!CPU_ISSET(pcpu, &online_cpuset)); - - return pcpu; -} - -static int test_migrate_vcpu(unsigned int vcpu_idx) -{ - int ret; - cpu_set_t cpuset; - uint32_t new_pcpu = test_get_pcpu(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); - - /* Allow the error where the vCPU thread is already finished */ - TEST_ASSERT(ret == 0 || ret == ESRCH, - "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", - vcpu_idx, new_pcpu, ret); - - return ret; -} - -static void *test_vcpu_migration(void *arg) -{ - unsigned int i, n_done; - bool vcpu_done; - - do { - usleep(msecs_to_usecs(test_args.migration_freq_ms)); - - for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) { - pthread_mutex_lock(&vcpu_done_map_lock); - vcpu_done = test_bit(i, vcpu_done_map); - pthread_mutex_unlock(&vcpu_done_map_lock); - - if (vcpu_done) { - n_done++; - continue; - } - - test_migrate_vcpu(i); - } - } while (test_args.nr_vcpus != n_done); - - return NULL; -} - -static void test_run(struct kvm_vm *vm) -{ - pthread_t pt_vcpu_migration; - unsigned int i; - int ret; - - pthread_mutex_init(&vcpu_done_map_lock, NULL); - vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); - TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n"); - - for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { - ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, - (void *)(unsigned long)i); - TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i); - } - - /* Spawn a thread to control the vCPU migrations */ - if (test_args.migration_freq_ms) { - srand(time(NULL)); - - ret = pthread_create(&pt_vcpu_migration, NULL, - test_vcpu_migration, NULL); - TEST_ASSERT(!ret, "Failed to create the migration pthread\n"); - } - - - for (i = 0; i < test_args.nr_vcpus; i++) - pthread_join(pt_vcpu_run[i], NULL); - - if (test_args.migration_freq_ms) - pthread_join(pt_vcpu_migration, NULL); - - bitmap_free(vcpu_done_map); -} - -static void test_init_timer_irq(struct kvm_vm *vm) -{ - /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); - - sync_global_to_guest(vm, ptimer_irq); - sync_global_to_guest(vm, vtimer_irq); - - pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); -} - -static int gic_fd; - -static struct kvm_vm *test_vm_create(void) -{ - struct kvm_vm *vm; - unsigned int i; - int nr_vcpus = test_args.nr_vcpus; - - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - - vm_init_descriptor_tables(vm); - vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); - - if (!test_args.offset.reserved) { - if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) - vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset); - else - TEST_FAIL("no support for global offset\n"); - } - - for (i = 0; i < nr_vcpus; i++) - vcpu_init_descriptor_tables(vcpus[i]); - - test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); - - /* Make all the test's cmdline args visible to the guest */ - sync_global_to_guest(vm, test_args); - - return vm; -} - -static void test_vm_cleanup(struct kvm_vm *vm) -{ - close(gic_fd); - kvm_vm_free(vm); -} - -static void test_print_help(char *name) -{ - pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n", - name); - pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n", - NR_VCPUS_DEF, KVM_MAX_VCPUS); - pr_info("\t-i: Number of iterations per stage (default: %u)\n", - NR_TEST_ITERS_DEF); - pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n", - TIMER_TEST_PERIOD_MS_DEF); - pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n", - TIMER_TEST_MIGRATION_FREQ_MS); - pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n"); - pr_info("\t-h: print this help screen\n"); -} - -static bool parse_args(int argc, char *argv[]) -{ - int opt; - - while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) { - switch (opt) { - case 'n': - test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); - if (test_args.nr_vcpus > KVM_MAX_VCPUS) { - pr_info("Max allowed vCPUs: %u\n", - KVM_MAX_VCPUS); - goto err; - } - break; - case 'i': - test_args.nr_iter = atoi_positive("Number of iterations", optarg); - break; - case 'p': - test_args.timer_period_ms = atoi_positive("Periodicity", optarg); - break; - case 'm': - test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); - break; - case 'o': - test_args.offset.counter_offset = strtol(optarg, NULL, 0); - test_args.offset.reserved = 0; - break; - case 'h': - default: - goto err; - } - } - - return true; - -err: - test_print_help(argv[0]); - return false; -} - -int main(int argc, char *argv[]) -{ - struct kvm_vm *vm; - - if (!parse_args(argc, argv)) - exit(KSFT_SKIP); - - __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2, - "At least two physical CPUs needed for vCPU migration"); - - vm = test_vm_create(); - test_run(vm); - test_vm_cleanup(vm); - - return 0; -} diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..b058f27b2141 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -7,9 +7,11 @@ * This test measures the performance effects of KVM's access tracking. * Access tracking is driven by the MMU notifiers test_young, clear_young, and * clear_flush_young. These notifiers do not have a direct userspace API, - * however the clear_young notifier can be triggered by marking a pages as idle - * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to - * enable access tracking on guest memory. + * however the clear_young notifier can be triggered either by + * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR + * 2. adding a new MGLRU generation using the lru_gen debugfs file. + * This test leverages page_idle to enable access tracking on guest memory + * unless MGLRU is enabled, in which case MGLRU is used. * * To measure performance this test runs a VM with a configurable number of * vCPUs that each touch every page in disjoint regions of memory. Performance @@ -17,10 +19,11 @@ * predefined region. * * Note that a deterministic correctness test of access tracking is not possible - * by using page_idle as it exists today. This is for a few reasons: + * by using page_idle or MGLRU aging as it exists today. This is for a few + * reasons: * - * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This - * means subsequent guest accesses are not guaranteed to see page table + * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush. + * This means subsequent guest accesses are not guaranteed to see page table * updates made by KVM until some time in the future. * * 2. page_idle only operates on LRU pages. Newly allocated pages are not @@ -47,10 +50,19 @@ #include "memstress.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +#include "cgroup_util.h" +#include "lru_gen_util.h" + +static const char *TEST_MEMCG_NAME = "access_tracking_perf_test"; /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; +/* The cgroup memory controller root. Needed for lru_gen-based aging. */ +char cgroup_root[PATH_MAX]; + /* Defines what vCPU threads should do during a given iteration. */ static enum { /* Run the vCPU to access all its memory. */ @@ -65,6 +77,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access; +/* + * If the test should only warn if there are too many idle pages (i.e., it is + * expected). + * -1: Not yet set. + * 0: We do not expect too many idle pages, so FAIL if too many idle pages. + * 1: Having too many idle pages is expected, so merely print a warning if + * too many idle pages are found. + */ +static int idle_pages_warn_only = -1; + +/* Whether or not to use MGLRU instead of page_idle for access tracking */ +static bool use_lru_gen; + +/* Total number of pages to expect in the memcg after touching everything */ +static long test_pages; + +/* Last generation we found the pages in */ +static int lru_gen_last_gen = -1; + struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; @@ -123,8 +154,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) "Set page_idle bits for PFN 0x%" PRIx64, pfn); } -static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct memstress_vcpu_args *vcpu_args) +static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx) +{ + char prefix[18] = {}; + + if (vcpu_idx >= 0) + snprintf(prefix, 18, "vCPU%d: ", vcpu_idx); + + TEST_ASSERT(idle_pages_warn_only, + "%sToo many pages still idle (%lu out of %lu)", + prefix, idle_pages, total_pages); + + printf("WARNING: %sToo many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", + prefix, idle_pages, total_pages); +} + +static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; uint64_t base_gva = vcpu_args->gva; @@ -177,27 +224,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. - * - * When running the guest as a nested VM, "warn" instead of asserting - * as the TLB size is effectively unlimited and the KVM doesn't - * explicitly flush the TLB when aging SPTEs. As a result, more pages - * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle >= pages / 10) { -#ifdef __x86_64__ - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), - "vCPU%d: Too many pages still idle (%lu out of %lu)", - vcpu_idx, still_idle, pages); -#endif - printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " - "this will affect performance results.\n", - vcpu_idx, still_idle, pages); - } + if (still_idle >= pages / 10) + too_many_idle_pages(still_idle, pages, + overlap_memory_access ? -1 : vcpu_idx); close(page_idle_fd); close(pagemap_fd); } +int find_generation(struct memcg_stats *stats, long total_pages) +{ + /* + * For finding the generation that contains our pages, use the same + * 90% threshold that page_idle uses. + */ + int gen = lru_gen_find_generation(stats, total_pages * 9 / 10); + + if (gen >= 0) + return gen; + + if (!idle_pages_warn_only) { + TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).", + total_pages * 9 / 10); + return gen; + } + + /* + * We couldn't find a generation with 90% of guest memory, which can + * happen if access tracking is unreliable. Simply look for a majority + * of pages. + */ + puts("WARNING: Couldn't find a generation with 90% of guest memory. " + "Performance results may not be accurate."); + gen = lru_gen_find_generation(stats, total_pages / 2); + TEST_ASSERT(gen >= 0, + "Could not find a generation with 50%% of guest memory (%ld pages).", + total_pages / 2); + return gen; +} + +static void lru_gen_mark_memory_idle(struct kvm_vm *vm) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + struct memcg_stats stats; + int new_gen; + + /* Make a new generation */ + clock_gettime(CLOCK_MONOTONIC, &ts_start); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + ts_elapsed = timespec_elapsed(ts_start); + + /* Check the generation again */ + new_gen = find_generation(&stats, test_pages); + + /* + * This function should only be invoked with newly-accessed pages, + * so pages should always move to a newer generation. + */ + if (new_gen <= lru_gen_last_gen) { + /* We did not move to a newer generation. */ + long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen, + &stats); + + too_many_idle_pages(min_t(long, idle_pages, test_pages), + test_pages, -1); + } + pr_info("%-30s: %ld.%09lds\n", + "Mark memory idle (lru_gen)", ts_elapsed.tv_sec, + ts_elapsed.tv_nsec); + lru_gen_last_gen = new_gen; +} + static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) { struct ucall uc; @@ -237,9 +336,9 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) assert_ucall(vcpu, UCALL_SYNC); break; case ITERATION_MARK_IDLE: - mark_vcpu_memory_idle(vm, vcpu_args); + pageidle_mark_vcpu_memory_idle(vm, vcpu_args); break; - }; + } vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } @@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus, static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus) { + if (use_lru_gen) + return lru_gen_mark_memory_idle(vm); + /* * Even though this parallelizes the work across vCPUs, this is still a * very slow operation because page_idle forces the test to mark one pfn - * at a time and the clear_young notifier serializes on the KVM MMU + * at a time and the clear_young notifier may serialize on the KVM MMU * lock. */ pr_debug("Marking VM memory idle (slow)...\n"); iteration_work = ITERATION_MARK_IDLE; - run_iteration(vm, nr_vcpus, "Mark memory idle"); + run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)"); } static void run_test(enum vm_guest_mode mode, void *arg) @@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); + /* + * If guest_page_size is larger than the host's page size, the + * guest (memstress) will only fault in a subset of the host's pages. + */ + test_pages = params->nr_vcpus * params->vcpu_memory_bytes / + max(memstress_args.guest_page_size, + (uint64_t)getpagesize()); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); + if (use_lru_gen) { + struct memcg_stats stats; + + /* + * Do a page table scan now. Following initial population, aging + * may not cause the pages to move to a newer generation. Do + * an aging pass now so that future aging passes always move + * pages to a newer generation. + */ + printf("Initial aging pass (lru_gen)\n"); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages, + "Not all pages accounted for (looking for %ld). " + "Was the memcg set up correctly?", test_pages); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory"); + lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME); + lru_gen_last_gen = find_generation(&stats, test_pages); + } + /* As a control, read and write to the populated memory first. */ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory"); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory"); @@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg) memstress_destroy_vm(vm); } +static int access_tracking_unreliable(void) +{ +#ifdef __x86_64__ + /* + * When running nested, the TLB size may be effectively unlimited (for + * example, this is the case when running on KVM L0), and KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. + */ + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { + puts("Skipping idle page count sanity check, because the test is run nested"); + return 1; + } +#endif + /* + * When NUMA balancing is enabled, guest memory will be unmapped to get + * NUMA faults, dropping the Accessed bits. + */ + if (is_numa_balancing_enabled()) { + puts("Skipping idle page count sanity check, because NUMA balancing is enabled"); + return 1; + } + return 0; +} + +static int run_test_for_each_guest_mode(const char *cgroup, void *arg) +{ + for_each_guest_mode(run_test, arg); + return 0; +} + static void help(char *name) { puts(""); @@ -342,11 +502,22 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -w: Control whether the test warns or fails if more than 10%%\n" + " of pages are still seen as idle/old after accessing guest\n" + " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" + " mode, the test fails by default, but switches to warn only\n" + " if NUMA balancing is enabled or the test detects it's running\n" + " in a VM.\n"); backing_src_help("-s"); puts(""); exit(0); } +void destroy_cgroup(char *cg) +{ + printf("Destroying cgroup: %s\n", cg); +} + int main(int argc, char *argv[]) { struct test_params params = { @@ -354,12 +525,13 @@ int main(int argc, char *argv[]) .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .nr_vcpus = 1, }; + char *new_cg = NULL; int page_idle_fd; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -376,6 +548,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break; + case 'w': + idle_pages_warn_only = + atoi_non_negative("Idle pages warning", + optarg); + break; case 'h': default: help(argv[0]); @@ -383,12 +560,50 @@ int main(int argc, char *argv[]) } } - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "CONFIG_IDLE_PAGE_TRACKING is not enabled"); - close(page_idle_fd); + if (idle_pages_warn_only == -1) + idle_pages_warn_only = access_tracking_unreliable(); + + if (lru_gen_usable()) { + bool cg_created = true; + int ret; - for_each_guest_mode(run_test, ¶ms); + puts("Using lru_gen for aging"); + use_lru_gen = true; + + if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory")) + ksft_exit_skip("Cannot find memory cgroup controller\n"); + + new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME); + printf("Creating cgroup: %s\n", new_cg); + if (cg_create(new_cg)) { + if (errno == EEXIST) { + printf("Found existing cgroup"); + cg_created = false; + } else { + ksft_exit_skip("could not create new cgroup: %s\n", new_cg); + } + } + + /* + * This will fork off a new process to run the test within + * a new memcg, so we need to properly propagate the return + * value up. + */ + ret = cg_run(new_cg, &run_test_for_each_guest_mode, ¶ms); + if (cg_created) + cg_destroy(new_cg); + if (ret < 0) + TEST_FAIL("child did not spawn or was abnormally killed"); + if (ret) + return ret; + } else { + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); + close(page_idle_fd); + + puts("Using page_idle for aging"); + run_test_for_each_guest_mode(NULL, ¶ms); + } return 0; } diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c new file mode 100644 index 000000000000..cf8fb67104f1 --- /dev/null +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer.c - Tests the arch timer IRQ functionality + * + * The guest's main thread configures the timer interrupt and waits + * for it to fire, with a timeout equal to the timer period. + * It asserts that the timeout doesn't exceed the timer period plus + * a user configurable error margin(default to 100us) + * + * On the other hand, upon receipt of an interrupt, the guest's interrupt + * handler validates the interrupt by checking if the architectural state + * is in compliance with the specifications. + * + * The test provides command-line options to configure the timer's + * period (-p), number of vCPUs (-n), iterations per stage (-i) and timer + * interrupt arrival error margin (-e). To stress-test the timer stack + * even more, an option to migrate the vCPUs across pCPUs (-m), at a + * particular rate, is also provided. + * + * Copyright (c) 2021, Google LLC. + */ +#include <stdlib.h> +#include <pthread.h> +#include <linux/sizes.h> +#include <linux/bitmap.h> +#include <sys/sysinfo.h> + +#include "timer_test.h" +#include "ucall_common.h" + +struct test_args test_args = { + .nr_vcpus = NR_VCPUS_DEF, + .nr_iter = NR_TEST_ITERS_DEF, + .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF, + .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS, + .timer_err_margin_us = TIMER_TEST_ERR_MARGIN_US, + .reserved = 1, +}; + +struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; +struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS]; + +static pthread_t pt_vcpu_run[KVM_MAX_VCPUS]; +static unsigned long *vcpu_done_map; +static pthread_mutex_t vcpu_done_map_lock; + +static void *test_vcpu_run(void *arg) +{ + unsigned int vcpu_idx = (unsigned long)arg; + struct ucall uc; + struct kvm_vcpu *vcpu = vcpus[vcpu_idx]; + struct kvm_vm *vm = vcpu->vm; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx]; + + vcpu_run(vcpu); + + /* Currently, any exit from guest is an indication of completion */ + pthread_mutex_lock(&vcpu_done_map_lock); + __set_bit(vcpu_idx, vcpu_done_map); + pthread_mutex_unlock(&vcpu_done_map_lock); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + case UCALL_DONE: + break; + case UCALL_ABORT: + sync_global_from_guest(vm, *shared_data); + fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n", + vcpu_idx, shared_data->guest_stage, shared_data->nr_iter); + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + + pr_info("PASS(vCPU-%d).\n", vcpu_idx); + + return NULL; +} + +static uint32_t test_get_pcpu(void) +{ + uint32_t pcpu; + unsigned int nproc_conf; + cpu_set_t online_cpuset; + + nproc_conf = get_nprocs_conf(); + sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset); + + /* Randomly find an available pCPU to place a vCPU on */ + do { + pcpu = rand() % nproc_conf; + } while (!CPU_ISSET(pcpu, &online_cpuset)); + + return pcpu; +} + +static int test_migrate_vcpu(unsigned int vcpu_idx) +{ + int ret; + uint32_t new_pcpu = test_get_pcpu(); + + pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); + + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); + + /* Allow the error where the vCPU thread is already finished */ + TEST_ASSERT(ret == 0 || ret == ESRCH, + "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d", + vcpu_idx, new_pcpu, ret); + + return ret; +} + +static void *test_vcpu_migration(void *arg) +{ + unsigned int i, n_done; + bool vcpu_done; + + do { + usleep(msecs_to_usecs(test_args.migration_freq_ms)); + + for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) { + pthread_mutex_lock(&vcpu_done_map_lock); + vcpu_done = test_bit(i, vcpu_done_map); + pthread_mutex_unlock(&vcpu_done_map_lock); + + if (vcpu_done) { + n_done++; + continue; + } + + test_migrate_vcpu(i); + } + } while (test_args.nr_vcpus != n_done); + + return NULL; +} + +static void test_run(struct kvm_vm *vm) +{ + pthread_t pt_vcpu_migration; + unsigned int i; + int ret; + + pthread_mutex_init(&vcpu_done_map_lock, NULL); + vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); + TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap"); + + for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { + ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, + (void *)(unsigned long)i); + TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i); + } + + /* Spawn a thread to control the vCPU migrations */ + if (test_args.migration_freq_ms) { + srand(time(NULL)); + + ret = pthread_create(&pt_vcpu_migration, NULL, + test_vcpu_migration, NULL); + TEST_ASSERT(!ret, "Failed to create the migration pthread"); + } + + + for (i = 0; i < test_args.nr_vcpus; i++) + pthread_join(pt_vcpu_run[i], NULL); + + if (test_args.migration_freq_ms) + pthread_join(pt_vcpu_migration, NULL); + + bitmap_free(vcpu_done_map); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n" + "\t\t [-m migration_freq_ms] [-o counter_offset]\n" + "\t\t [-e timer_err_margin_us]\n", name); + pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n", + NR_VCPUS_DEF, KVM_MAX_VCPUS); + pr_info("\t-i: Number of iterations per stage (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n", + TIMER_TEST_PERIOD_MS_DEF); + pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n", + TIMER_TEST_MIGRATION_FREQ_MS); + pr_info("\t-o: Counter offset (in counter cycles, default: 0) [aarch64-only]\n"); + pr_info("\t-e: Interrupt arrival error margin (in us) of the guest timer (default: %u)\n", + TIMER_TEST_ERR_MARGIN_US); + pr_info("\t-h: print this help screen\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "hn:i:p:m:o:e:")) != -1) { + switch (opt) { + case 'n': + test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); + if (test_args.nr_vcpus > KVM_MAX_VCPUS) { + pr_info("Max allowed vCPUs: %u\n", + KVM_MAX_VCPUS); + goto err; + } + break; + case 'i': + test_args.nr_iter = atoi_positive("Number of iterations", optarg); + break; + case 'p': + test_args.timer_period_ms = atoi_positive("Periodicity", optarg); + break; + case 'm': + test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); + break; + case 'e': + test_args.timer_err_margin_us = atoi_non_negative("Error Margin", optarg); + break; + case 'o': + test_args.counter_offset = strtol(optarg, NULL, 0); + test_args.reserved = 0; + break; + case 'h': + default: + goto err; + } + } + + return true; + +err: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2, + "At least two physical CPUs needed for vCPU migration"); + + vm = test_vm_create(); + test_run(vm); + test_vm_cleanup(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index 4951ac53d1f8..713005b6f508 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -97,8 +97,8 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_wi_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); /* * Expect the ioctl to succeed with no effect on the register @@ -106,8 +106,8 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) */ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); - vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); } } @@ -126,15 +126,15 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t reg_id = raz_invariant_reg_ids[i]; uint64_t val; - vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); - vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + val = vcpu_get_reg(vcpu, reg_id); + TEST_ASSERT_EQ(val, 0); } } @@ -144,10 +144,10 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { uint64_t val, el0; - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val); - return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); + return el0 == ID_AA64PFR0_EL1_EL0_IMP; } int main(void) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c new file mode 100644 index 000000000000..d592a4515399 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates both the virtual and physical timer IRQs using + * CVAL and TVAL registers. + * + * Copyright (c) 2021, Google LLC. + */ +#include "arch_timer.h" +#include "delay.h" +#include "gic.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" +#include "vgic.h" + +enum guest_stage { + GUEST_STAGE_VTIMER_CVAL = 1, + GUEST_STAGE_VTIMER_TVAL, + GUEST_STAGE_PTIMER_CVAL, + GUEST_STAGE_PTIMER_TVAL, + GUEST_STAGE_MAX, +}; + +static int vtimer_irq, ptimer_irq; + +static void +guest_configure_timer_action(struct test_vcpu_shared_data *shared_data) +{ + switch (shared_data->guest_stage) { + case GUEST_STAGE_VTIMER_CVAL: + timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(VIRTUAL); + timer_set_ctl(VIRTUAL, CTL_ENABLE); + break; + case GUEST_STAGE_VTIMER_TVAL: + timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(VIRTUAL); + timer_set_ctl(VIRTUAL, CTL_ENABLE); + break; + case GUEST_STAGE_PTIMER_CVAL: + timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(PHYSICAL); + timer_set_ctl(PHYSICAL, CTL_ENABLE); + break; + case GUEST_STAGE_PTIMER_TVAL: + timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms); + shared_data->xcnt = timer_get_cntct(PHYSICAL); + timer_set_ctl(PHYSICAL, CTL_ENABLE); + break; + default: + GUEST_ASSERT(0); + } +} + +static void guest_validate_irq(unsigned int intid, + struct test_vcpu_shared_data *shared_data) +{ + enum guest_stage stage = shared_data->guest_stage; + uint64_t xcnt = 0, xcnt_diff_us, cval = 0; + unsigned long xctl = 0; + unsigned int timer_irq = 0; + unsigned int accessor; + + if (intid == IAR_SPURIOUS) + return; + + switch (stage) { + case GUEST_STAGE_VTIMER_CVAL: + case GUEST_STAGE_VTIMER_TVAL: + accessor = VIRTUAL; + timer_irq = vtimer_irq; + break; + case GUEST_STAGE_PTIMER_CVAL: + case GUEST_STAGE_PTIMER_TVAL: + accessor = PHYSICAL; + timer_irq = ptimer_irq; + break; + default: + GUEST_ASSERT(0); + return; + } + + xctl = timer_get_ctl(accessor); + if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE)) + return; + + timer_set_ctl(accessor, CTL_IMASK); + xcnt = timer_get_cntct(accessor); + cval = timer_get_cval(accessor); + + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, timer_irq); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(xcnt >= cval, + "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx", + xcnt, cval, xcnt_diff_us); + __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl); + + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + guest_validate_irq(intid, shared_data); + + gic_set_eoi(intid); +} + +static void guest_run_stage(struct test_vcpu_shared_data *shared_data, + enum guest_stage stage) +{ + uint32_t irq_iter, config_iter; + + shared_data->guest_stage = stage; + shared_data->nr_iter = 0; + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup the next interrupt */ + guest_configure_timer_action(shared_data); + + /* Setup a timeout for the interrupt to arrive */ + udelay(msecs_to_usecs(test_args.timer_period_ms) + + test_args.timer_err_margin_us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + local_irq_disable(); + + gic_init(GIC_V3, test_args.nr_vcpus); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL); + guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL); + guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL); + guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL); + + GUEST_DONE(); +} + +static void test_init_timer_irq(struct kvm_vm *vm) +{ + /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + unsigned int i; + int nr_vcpus = test_args.nr_vcpus; + + TEST_REQUIRE(kvm_supports_vgic_v3()); + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + if (!test_args.reserved) { + if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) { + struct kvm_arm_counter_offset offset = { + .counter_offset = test_args.counter_offset, + .reserved = 0, + }; + vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset); + } else + TEST_FAIL("no support for global offset"); + } + + for (i = 0; i < nr_vcpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + test_init_timer_irq(vm); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..993c9e38e729 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -0,0 +1,1059 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +/* Depends on counter width. */ +static uint64_t CVAL_MAX; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static cpu_set_t default_cpuset; + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset = default_cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + pin_self_to_cpu(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + pin_self_to_cpu(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + ptimer_irq = vcpu_get_ptimer_irq(vcpu); + vtimer_irq = vcpu_get_vtimer_irq(vcpu); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + + sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); +} + +static void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + int width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_supports_vgic_v3()); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + test_vm_cleanup(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + test_vm_cleanup(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c new file mode 100644 index 000000000000..c8ee6f520734 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes. + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +#define TEST_ADDR 0x80000000 + +enum { + CLEAR_ACCESS_FLAG, + TEST_ACCESS_FLAG, +}; + +static u64 *ptep_hva; + +#define copy_el2_to_el1(reg) \ + write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12) + +/* Yes, this is an ugly hack */ +#define __at(op, addr) write_sysreg_s(addr, op) + +#define test_at_insn(op, expect_fault) \ +do { \ + u64 par, fsc; \ + bool fault; \ + \ + GUEST_SYNC(CLEAR_ACCESS_FLAG); \ + \ + __at(OP_AT_##op, TEST_ADDR); \ + isb(); \ + par = read_sysreg(par_el1); \ + \ + fault = par & SYS_PAR_EL1_F; \ + fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \ + \ + __GUEST_ASSERT((expect_fault) == fault, \ + "AT "#op": %sexpected fault (par: %lx)1", \ + (expect_fault) ? "" : "un", par); \ + if ((expect_fault)) { \ + __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \ + "AT "#op": expected access flag fault (par: %lx)", \ + par); \ + } else { \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ + GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ + GUEST_SYNC(TEST_ACCESS_FLAG); \ + } \ +} while (0) + +static void test_at(bool expect_fault) +{ + test_at_insn(S1E2R, expect_fault); + test_at_insn(S1E2W, expect_fault); + + /* Reuse the stage-1 MMU context from EL2 at EL1 */ + copy_el2_to_el1(SCTLR); + copy_el2_to_el1(MAIR); + copy_el2_to_el1(TCR); + copy_el2_to_el1(TTBR0); + copy_el2_to_el1(TTBR1); + + /* Disable stage-2 translation and enter a non-host context */ + write_sysreg(0, vtcr_el2); + write_sysreg(0, vttbr_el2); + sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0); + isb(); + + test_at_insn(S1E1R, expect_fault); + test_at_insn(S1E1W, expect_fault); +} + +static void guest_code(void) +{ + sysreg_clear_set(tcr_el1, TCR_HA, 0); + isb(); + + test_at(true); + + if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) + GUEST_DONE(); + + /* + * KVM's software PTW makes the implementation choice that the AT + * instruction sets the access flag. + */ + sysreg_clear_set(tcr_el1, 0, TCR_HA); + isb(); + test_at(false); + + GUEST_DONE(); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + switch (uc->args[1]) { + case CLEAR_ACCESS_FLAG: + /* + * Delete + reinstall the memslot to invalidate stage-2 + * mappings of the stage-1 page tables, forcing KVM to + * use the 'slow' AT emulation path. + * + * This and clearing the access flag from host userspace + * ensures that the access flag cannot be set speculatively + * and is reliably cleared at the time of the AT instruction. + */ + clear_bit(__ffs(PTE_AF), ptep_hva); + vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); + break; + case TEST_ACCESS_FLAG: + TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva), + "Expected access flag to be set (desc: %lu)", *ptep_hva); + break; + default: + TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); + } +} + +static void run_test(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + return; + case UCALL_SYNC: + handle_sync(vcpu, &uc); + continue; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + virt_map(vm, TEST_ADDR, TEST_ADDR, 1); + ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3); + run_test(vcpu); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index 637be796086f..1d431de8729c 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs) static void guest_ss_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(ss_idx < 4, ss_idx); + __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx); ss_addr[ss_idx++] = regs->pc; regs->pstate |= SPSR_SS; } @@ -410,15 +410,15 @@ static void guest_code_ss(int test_cnt) /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - GUEST_ASSERT(bvr == w_bvr); - GUEST_ASSERT(wvr == w_wvr); + GUEST_ASSERT_EQ(bvr, w_bvr); + GUEST_ASSERT_EQ(wvr, w_wvr); } GUEST_DONE(); } static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -433,15 +433,15 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_BRK_INS, guest_sw_bp_handler); + ESR_ELx_EC_BRK64, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_WP_CURRENT, guest_wp_handler); + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SSTEP_CURRENT, guest_ss_handler); + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SVC64, guest_svc_handler); + ESR_ELx_EC_SVC64, guest_svc_handler); /* Specify bpn/wpn/ctx_bpn to be tested */ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); @@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt) TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG"); /* Check if the current pc is expected. */ - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); TEST_ASSERT(!test_pc || pc == test_pc, "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); @@ -583,7 +583,7 @@ int main(int argc, char *argv[]) uint64_t aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0); + aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); __TEST_REQUIRE(debug_version(aa64dfr0) >= 6, "Armv8 debug architecture not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..d8fe17a6cc59 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); + test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 4f10055af2aa..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -4,49 +4,22 @@ * * Copyright (C) 2020, Red Hat, Inc. * - * When attempting to migrate from a host with an older kernel to a host - * with a newer kernel we allow the newer kernel on the destination to - * list new registers with get-reg-list. We assume they'll be unused, at - * least until the guest reboots, and so they're relatively harmless. - * However, if the destination host with the newer kernel is missing - * registers which the source host with the older kernel has, then that's - * a regression in get-reg-list. This test checks for that regression by - * checking the current list against a blessed list. We should never have - * missing registers, but if new ones appear then they can probably be - * added to the blessed list. A completely new blessed list can be created - * by running the test with the --list command line argument. - * - * Note, the blessed list should be created from the oldest possible - * kernel. We can't go older than v4.15, though, because that's the first - * release to expose the ID system registers in KVM_GET_REG_LIST, see - * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features - * from guests"). Also, one must use the --core-reg-fixup command line - * option when running on an older kernel that doesn't include df205b5c6328 - * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + * While the blessed list should be created from the oldest possible + * kernel, we can't go older than v5.2, though, because that's the first + * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid + * core register IDs in KVM_GET_REG_LIST"). Without that commit the core + * registers won't match expectations. */ #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/wait.h> #include "kvm_util.h" #include "test_util.h" #include "processor.h" -static struct kvm_reg_list *reg_list; -static __u64 *blessed_reg, blessed_n; - -struct reg_sublist { - const char *name; - long capability; - int feature; - bool finalize; - __u64 *regs; - __u64 regs_n; - __u64 *rejects_set; - __u64 rejects_set_n; -}; +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) struct feature_id_reg { __u64 reg; @@ -55,91 +28,51 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, - 1 - } -}; - -struct vcpu_config { - char *name; - struct reg_sublist sublists[]; -}; - -static struct vcpu_config *vcpu_configs[]; -static int vcpu_configs_n; - -#define for_each_sublist(c, s) \ - for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) - -#define for_each_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) - -#define for_each_reg_filtered(i) \ - for_each_reg(i) \ - if (!filter_reg(reg_list->reg[i])) - -#define for_each_missing_reg(i) \ - for ((i) = 0; (i) < blessed_n; ++(i)) \ - if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ - if (check_supported_feat_reg(vcpu, blessed_reg[i])) +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v -#define for_each_new_reg(i) \ - for_each_reg_filtered(i) \ - if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) - -static const char *config_name(struct vcpu_config *c) -{ - struct reg_sublist *s; - int len = 0; - - if (c->name) - return c->name; - - for_each_sublist(c, s) - len += strlen(s->name) + 1; - - c->name = malloc(len); - - len = 0; - for_each_sublist(c, s) { - if (!strcmp(s->name, "base")) - continue; - strcat(c->name + len, s->name); - len += strlen(s->name) + 1; - c->name[len - 1] = '+'; +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } - c->name[len - 1] = '\0'; - - return c->name; -} - -static bool has_cap(struct vcpu_config *c, long capability) -{ - struct reg_sublist *s; - for_each_sublist(c, s) - if (s->capability == capability) - return true; - return false; -} +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), + REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), +}; -static bool filter_reg(__u64 reg) +bool filter_reg(__u64 reg) { /* * DEMUX register presence depends on the host's CLIDR_EL1. @@ -151,16 +84,6 @@ static bool filter_reg(__u64 reg) return false; } -static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) -{ - int i; - - for (i = 0; i < nr_regs; ++i) - if (reg == regs[i]) - return true; - return false; -} - static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) { int i, ret; @@ -180,17 +103,27 @@ static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) return true; } -static const char *str_with_index(const char *template, __u64 index) +bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) { - char *str, *p; - int n; + return check_supported_feat_reg(vcpu, reg); +} + +bool check_reject_set(int err) +{ + return err == EPERM; +} - str = strdup(template); - p = strstr(str, "##"); - n = sprintf(p, "%lld", index); - strcat(p + n, strstr(template, "##") + 2); +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int feature; - return (const char *)str; + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); + } + } } #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -199,7 +132,7 @@ static const char *str_with_index(const char *template, __u64 index) #define CORE_SPSR_XX_NR_WORDS 2 #define CORE_FPREGS_XX_NR_WORDS 4 -static const char *core_id_to_str(struct vcpu_config *c, __u64 id) +static const char *core_id_to_str(const char *prefix, __u64 id) { __u64 core_off = id & ~REG_MASK, idx; @@ -210,8 +143,8 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... KVM_REG_ARM_CORE_REG(regs.regs[30]): idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx); case KVM_REG_ARM_CORE_REG(regs.sp): return "KVM_REG_ARM_CORE_REG(regs.sp)"; case KVM_REG_ARM_CORE_REG(regs.pc): @@ -225,24 +158,24 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; } - TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id); + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); return NULL; } -static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) +static const char *sve_id_to_str(const char *prefix, __u64 id) { __u64 sve_off, n, i; @@ -252,37 +185,37 @@ static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id); + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id); switch (sve_off) { case KVM_REG_ARM64_SVE_ZREG_BASE ... KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_PREG_BASE ... KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_FFR_BASE: TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id); return "KVM_REG_ARM64_SVE_FFR(0)"; } return NULL; } -static void print_reg(struct vcpu_config *c, __u64 id) +void print_reg(const char *prefix, __u64 id) { unsigned op0, op1, crn, crm, op2; const char *reg_size = NULL; TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id); + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id); switch (id & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U8: @@ -314,16 +247,16 @@ static void print_reg(struct vcpu_config *c, __u64 id) break; default: TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); } switch (id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id)); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id)); break; case KVM_REG_ARM_DEMUX: TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); break; @@ -334,370 +267,34 @@ static void print_reg(struct vcpu_config *c, __u64 id) crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id); printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); break; case KVM_REG_ARM_FW: TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM_FW_FEAT_BMAP: TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), - "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - if (has_cap(c, KVM_CAP_ARM_SVE)) - printf("\t%s,\n", sve_id_to_str(c, id)); - else - TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id); + printf("\t%s,\n", sve_id_to_str(prefix, id)); break; default: TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); } } /* - * Older kernels listed each 32-bit word of CORE registers separately. - * For 64 and 128-bit registers we need to ignore the extra words. We - * also need to fixup the sizes, because the older kernels stated all - * registers were 64-bit, even when they weren't. - */ -static void core_reg_fixup(void) -{ - struct kvm_reg_list *tmp; - __u64 id, core_off; - int i; - - tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); - - for (i = 0; i < reg_list->n; ++i) { - id = reg_list->reg[i]; - - if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { - tmp->reg[tmp->n++] = id; - continue; - } - - core_off = id & ~REG_MASK; - - switch (core_off) { - case 0x52: case 0xd2: case 0xd6: - /* - * These offsets are pointing at padding. - * We need to ignore them too. - */ - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... - KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): - if (core_off & 3) - continue; - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U128; - tmp->reg[tmp->n++] = id; - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): - case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U32; - tmp->reg[tmp->n++] = id; - continue; - default: - if (core_off & 1) - continue; - tmp->reg[tmp->n++] = id; - break; - } - } - - free(reg_list); - reg_list = tmp; -} - -static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) - if (s->capability) - init->features[s->feature / 32] |= 1 << (s->feature % 32); -} - -static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c) -{ - struct reg_sublist *s; - int feature; - - for_each_sublist(c, s) { - if (s->finalize) { - feature = s->feature; - vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); - } - } -} - -static void check_supported(struct vcpu_config *c) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) { - if (!s->capability) - continue; - - __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", - config_name(c), s->name); - } -} - -static bool print_list; -static bool print_filtered; -static bool fixup_core_regs; - -static void run_test(struct vcpu_config *c) -{ - struct kvm_vcpu_init init = { .target = -1, }; - int new_regs = 0, missing_regs = 0, i, n; - int failed_get = 0, failed_set = 0, failed_reject = 0; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct reg_sublist *s; - - check_supported(c); - - vm = vm_create_barebones(); - prepare_vcpu_init(c, &init); - vcpu = __vm_vcpu_add(vm, 0); - aarch64_vcpu_setup(vcpu, &init); - finalize_vcpu(vcpu, c); - - reg_list = vcpu_get_reg_list(vcpu); - - if (fixup_core_regs) - core_reg_fixup(); - - if (print_list || print_filtered) { - putchar('\n'); - for_each_reg(i) { - __u64 id = reg_list->reg[i]; - if ((print_list && !filter_reg(id)) || - (print_filtered && filter_reg(id))) - print_reg(c, id); - } - putchar('\n'); - return; - } - - /* - * We only test that we can get the register and then write back the - * same value. Some registers may allow other values to be written - * back, but others only allow some bits to be changed, and at least - * for ID registers set will fail if the value does not exactly match - * what was returned by get. If registers that allow other values to - * be written need to have the other values tested, then we should - * create a new set of tests for those in a new independent test - * executable. - */ - for_each_reg(i) { - uint8_t addr[2048 / 8]; - struct kvm_one_reg reg = { - .id = reg_list->reg[i], - .addr = (__u64)&addr, - }; - bool reject_reg = false; - int ret; - - ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); - if (ret) { - printf("%s: Failed to get ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_get; - } - - /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ - for_each_sublist(c, s) { - if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { - reject_reg = true; - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret != -1 || errno != EPERM) { - printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); - print_reg(c, reg.id); - putchar('\n'); - ++failed_reject; - } - break; - } - } - - if (!reject_reg) { - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret) { - printf("%s: Failed to set ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_set; - } - } - } - - for_each_sublist(c, s) - blessed_n += s->regs_n; - blessed_reg = calloc(blessed_n, sizeof(__u64)); - - n = 0; - for_each_sublist(c, s) { - for (i = 0; i < s->regs_n; ++i) - blessed_reg[n++] = s->regs[i]; - } - - for_each_new_reg(i) - ++new_regs; - - for_each_missing_reg(i) - ++missing_regs; - - if (new_regs || missing_regs) { - n = 0; - for_each_reg_filtered(i) - ++n; - - printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", - config_name(c), reg_list->n, reg_list->n - n); - } - - if (new_regs) { - printf("\n%s: There are %d new registers.\n" - "Consider adding them to the blessed reg " - "list with the following lines:\n\n", config_name(c), new_regs); - for_each_new_reg(i) - print_reg(c, reg_list->reg[i]); - putchar('\n'); - } - - if (missing_regs) { - printf("\n%s: There are %d missing registers.\n" - "The following lines are missing registers:\n\n", config_name(c), missing_regs); - for_each_missing_reg(i) - print_reg(c, blessed_reg[i]); - putchar('\n'); - } - - TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, - "%s: There are %d missing registers; " - "%d registers failed get; %d registers failed set; %d registers failed reject", - config_name(c), missing_regs, failed_get, failed_set, failed_reject); - - pr_info("%s: PASS\n", config_name(c)); - blessed_n = 0; - free(blessed_reg); - free(reg_list); - kvm_vm_free(vm); -} - -static void help(void) -{ - struct vcpu_config *c; - int i; - - printf( - "\n" - "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n" - " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" - " '<selection>' may be\n"); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - printf( - " '%s'\n", config_name(c)); - } - - printf( - "\n" - " --list Print the register list rather than test it (requires --config)\n" - " --list-filtered Print registers that would normally be filtered out (requires --config)\n" - " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n" - "\n" - ); -} - -static struct vcpu_config *parse_config(const char *config) -{ - struct vcpu_config *c; - int i; - - if (config[8] != '=') - help(), exit(1); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (strcmp(config_name(c), &config[9]) == 0) - break; - } - - if (i == vcpu_configs_n) - help(), exit(1); - - return c; -} - -int main(int ac, char **av) -{ - struct vcpu_config *c, *sel = NULL; - int i, ret = 0; - pid_t pid; - - for (i = 1; i < ac; ++i) { - if (strcmp(av[i], "--core-reg-fixup") == 0) - fixup_core_regs = true; - else if (strncmp(av[i], "--config", 8) == 0) - sel = parse_config(av[i]); - else if (strcmp(av[i], "--list") == 0) - print_list = true; - else if (strcmp(av[i], "--list-filtered") == 0) - print_filtered = true; - else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) - help(), exit(0); - else - help(), exit(1); - } - - if (print_list || print_filtered) { - /* - * We only want to print the register list of a single config. - */ - if (!sel) - help(), exit(1); - } - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (sel && c != sel) - continue; - - pid = fork(); - - if (!pid) { - run_test(c); - exit(0); - } else { - int wstatus; - pid_t wpid = wait(&wstatus); - TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); - if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) - ret = KSFT_FAIL; - } - } - - return ret; -} - -/* - * The current blessed list was primed with the output of kernel version + * The original blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * (The --core-reg-fixup option and it's fixup function have been removed + * from the test, as it's unlikely to use this type of test on a kernel + * older than v5.2.) * * The blessed list is up to date with kernel version v6.4 (or so we hope) */ @@ -752,9 +349,21 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ @@ -888,6 +497,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ @@ -900,6 +510,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -907,6 +518,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ @@ -1103,6 +715,67 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), +}; + +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -1129,48 +802,124 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } + +static struct vcpu_reg_list vregs_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list sve_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list pauth_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; +static struct vcpu_reg_list pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; -static struct vcpu_config vregs_config = { +static struct vcpu_reg_list el2_vregs_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, VREGS_SUBLIST, {0}, }, }; -static struct vcpu_config vregs_pmu_config = { + +static struct vcpu_reg_list el2_vregs_pmu_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, VREGS_SUBLIST, PMU_SUBLIST, {0}, }, }; -static struct vcpu_config sve_config = { + +static struct vcpu_reg_list el2_sve_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, SVE_SUBLIST, {0}, }, }; -static struct vcpu_config sve_pmu_config = { + +static struct vcpu_reg_list el2_sve_pmu_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, SVE_SUBLIST, PMU_SUBLIST, {0}, }, }; -static struct vcpu_config pauth_config = { + +static struct vcpu_reg_list el2_pauth_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, VREGS_SUBLIST, PAUTH_SUBLIST, {0}, }, }; -static struct vcpu_config pauth_pmu_config = { + +static struct vcpu_reg_list el2_pauth_pmu_config = { .sublists = { BASE_SUBLIST, + EL2_SUBLIST, VREGS_SUBLIST, PAUTH_SUBLIST, PMU_SUBLIST, @@ -1178,12 +927,85 @@ static struct vcpu_config pauth_pmu_config = { }, }; -static struct vcpu_config *vcpu_configs[] = { +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, &sve_config, &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; -static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c new file mode 100644 index 000000000000..bbe6862c6ab1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hello_el2.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 + * + * Copyright 2025 Google LLC + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +static void guest_code(void) +{ + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); + + GUEST_ASSERT_EQ(get_current_el(), 2); + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), + ID_AA64MMFR1_EL1_VH_IMP); + + /* + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, + * which is really annoying to deal with in KVM describing E2H as RES1. + * + * If the implementation doesn't honor the trap then expect the register + * to return all zeros. + */ + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), + ID_AA64MMFR0_EL1_FGT_NI); + else + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); + + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index bef1499fb465..bf038a0371f4 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -8,7 +8,6 @@ * hypercalls are properly masked or unmasked to the guest when disabled or * enabled from the KVM userspace, respectively. */ - #include <errno.h> #include <linux/arm-smccc.h> #include <asm/kvm.h> @@ -22,22 +21,31 @@ #define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 #define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 #define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1 + +#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX) +#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0 struct kvm_fw_reg_info { uint64_t reg; /* Register definition */ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + uint64_t reset_val; /* Reset value for the register */ }; #define FW_REG_INFO(r) \ { \ .reg = r, \ .max_feat_bit = r##_BIT_MAX, \ + .reset_val = r##_RESET_VAL \ } static const struct kvm_fw_reg_info fw_reg_info[] = { FW_REG_INFO(KVM_REG_ARM_STD_BMAP), FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2), }; enum test_stage { @@ -100,20 +108,22 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: case TEST_STAGE_HVC_IFACE_FALSE_INFO: - GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } } } @@ -132,7 +142,7 @@ static void guest_code(void) guest_test_hvc(false_hvc_info); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } GUEST_SYNC(stage); @@ -170,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + uint64_t set_val; + + /* First 'read' should be the reset value for the reg */ + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == reg_info->reset_val, + "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", + reg_info->reg, reg_info->reset_val, val); - /* First 'read' should be an upper limit of the features supported */ - vcpu_get_reg(vcpu, reg_info->reg, &val); - TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", - reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + if (reg_info->reset_val) + set_val = 0; + else + set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit); - /* Test a 'write' by disabling all the features of the register map */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d\n", - reg_info->reg, errno); + "Failed to %s all the features of reg: 0x%lx; ret: %d", + (set_val ? "set" : "clear"), reg_info->reg, errno); - vcpu_get_reg(vcpu, reg_info->reg, &val); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); + val = vcpu_get_reg(vcpu, reg_info->reg); + TEST_ASSERT(val == set_val, + "Expected all the features to be %s for reg: 0x%lx", + (set_val ? "set" : "cleared"), reg_info->reg); + + /* + * If the reg has been set, clear it as test_fw_regs_after_vm_start() + * expects it to be cleared. + */ + if (set_val) { + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + TEST_ASSERT(ret == 0, + "Failed to clear all the features of reg: 0x%lx; ret: %d", + reg_info->reg, errno); + } /* * Test enabling a feature that's not supported. @@ -194,7 +221,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) if (reg_info->max_feat_bit < 63) { ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); TEST_ASSERT(ret != 0 && errno == EINVAL, - "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", errno, reg_info->reg); } } @@ -213,9 +240,9 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - vcpu_get_reg(vcpu, reg_info->reg, &val); + val = vcpu_get_reg(vcpu, reg_info->reg); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* @@ -225,7 +252,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); TEST_ASSERT(ret != 0 && errno == EBUSY, - "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", errno, reg_info->reg); } } @@ -264,7 +291,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) case TEST_STAGE_HVC_IFACE_FALSE_INFO: break; default: - TEST_FAIL("Unknown test stage: %d\n", prev_stage); + TEST_FAIL("Unknown test stage: %d", prev_stage); } } @@ -290,13 +317,10 @@ static void test_run(void) guest_done = true; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), stage); + REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } } diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..b5be9133535a --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include <errno.h> +#include <linux/arm-smccc.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c new file mode 100644 index 000000000000..152c34776981 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + test_disable_default_vgic(); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index df10f1ffa20d..4ccbd389d133 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -7,8 +7,6 @@ * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag). */ - -#define _GNU_SOURCE #include <linux/bitmap.h> #include <fcntl.h> #include <test_util.h> @@ -97,14 +95,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -136,8 +134,8 @@ static void guest_at(void) uint64_t par; asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); - par = read_sysreg(par_el1); isb(); + par = read_sysreg(par_el1); /* Bit 1 indicates whether the AT was successful */ GUEST_ASSERT_EQ(par & 1, 0); @@ -197,11 +195,11 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); @@ -293,12 +291,12 @@ static void guest_code(struct test_desc *test) static void no_dabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, read_sysreg(far_el1)); + GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1)); } static void no_iabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, regs->pc); + GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc); } static struct uffd_args { @@ -318,7 +316,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - ASSERT_EQ(addr, (uint64_t)args->hva); + TEST_ASSERT_EQ(addr, (uint64_t)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); @@ -376,14 +374,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p, *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, pt_args.hva, pt_args.paging_size, - test->uffd_pt_handler); + 1, test->uffd_pt_handler); *data_uffd = NULL; if (test->uffd_data_handler) *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, data_args.hva, data_args.paging_size, - test->uffd_data_handler); + 1, test->uffd_data_handler); } static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, @@ -415,10 +413,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, if (fd != -1) { ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, paging_size); - TEST_ASSERT(ret == 0, "fallocate failed\n"); + TEST_ASSERT(ret == 0, "fallocate failed"); } else { ret = madvise(hva, paging_size, MADV_DONTNEED); - TEST_ASSERT(ret == 0, "madvise failed\n"); + TEST_ASSERT(ret == 0, "madvise failed"); } return true; @@ -432,7 +430,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); hva = (void *)region->region.userspace_addr; - ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); memcpy(hva, run->mmio.data, run->mmio.len); events.mmio_exits += 1; @@ -502,7 +500,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) void fail_vcpu_run_no_handler(int ret) { - TEST_FAIL("Unexpected vcpu run failure\n"); + TEST_FAIL("Unexpected vcpu run failure"); } void fail_vcpu_run_mmio_no_syndrome_handler(int ret) @@ -546,9 +544,9 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_DABT, no_dabt_handler); + ESR_ELx_EC_DABT_CUR, no_dabt_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_IABT, no_iabt_handler); + ESR_ELx_EC_IABT_CUR, no_iabt_handler); } static void setup_gva_maps(struct kvm_vm *vm) @@ -631,9 +629,9 @@ static void setup_default_handlers(struct test_desc *test) static void check_event_counts(struct test_desc *test) { - ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); - ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); - ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); + TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); } static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) @@ -679,7 +677,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, } break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -706,7 +704,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) print_test_banner(mode, p); - vm = ____vm_create(mode); + vm = ____vm_create(VM_SHAPE(mode)); setup_memslots(vm, p); kvm_vm_elf_load(vm, program_invocation_name); setup_ucall(vm); @@ -843,6 +841,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syndrome, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ .expected_events = { .fail_vcpu_runs = 1 }, \ @@ -866,6 +865,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .guest_test_check = { _test_check }, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ @@ -895,6 +895,7 @@ static void help(char *name) .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ .uffd_pt_handler = uffd_pt_handler, \ diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index 9b004905d1d3..98e49f710aef 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -11,9 +11,9 @@ * KVM_SYSTEM_EVENT_SUSPEND UAPI. */ -#define _GNU_SOURCE - +#include <linux/kernel.h> #include <linux/psci.h> +#include <asm/cputype.h> #include "kvm_util.h" #include "processor.h" @@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 0, 0, 0, 0, &res); return res.a0; @@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 0, 0, 0, 0, 0, &res); return res.a0; @@ -48,17 +48,26 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 0, 0, 0, 0, 0, &res); return res.a0; } +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -80,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, vm = vm_create(2); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *source = aarch64_vcpu_add(vm, 0, &init, guest_code); *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -102,8 +112,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); + obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -143,7 +153,7 @@ static void host_test_cpu_on(void) */ vcpu_power_off(target); - vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); enter_guest(source); @@ -188,11 +198,94 @@ static void host_test_system_suspend(void) kvm_vm_free(vm); } +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + int main(void) { TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); + host_test_system_off2(); return 0; } diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c new file mode 100644 index 000000000000..573dd790aeb8 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails + * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER. + * + * After reaching userspace with expected arm_sea info, also test userspace + * injecting a synchronous external data abort into the guest. + * + * This test utilizes EINJ to generate a REAL synchronous external data + * abort by consuming a recoverable uncorrectable memory error. Therefore + * the device under test must support EINJ in both firmware and host kernel, + * including the notrigger feature. Otherwise the test will be skipped. + * The under-test platform's APEI should be unable to claim SEA. Otherwise + * the test will also be skipped. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "guest_modes.h" + +#define PAGE_PRESENT (1ULL << 63) +#define PAGE_PHYSICAL 0x007fffffffffffffULL +#define PAGE_ADDR_MASK (~(0xfffULL)) + +/* Group ISV and ISS[23:14]. */ +#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \ + (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \ + (ESR_ELx_SF) | (ESR_ELx_AR)) + +#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type" +#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1" +#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2" +#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" +#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" +#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" +/* Memory Uncorrectable non-fatal. */ +#define ERROR_TYPE_MEMORY_UER 0x10 +/* Memory address and mask valid (param1 and param2). */ +#define MASK_MEMORY_UER 0b10 + +/* Guest virtual address region = [2G, 3G). */ +#define START_GVA 0x80000000UL +#define VM_MEM_SIZE 0x40000000UL +/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */ +#define EINJ_OFFSET 0x01234badUL +#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET)) + +static vm_paddr_t einj_gpa; +static void *einj_hva; +static uint64_t einj_hpa; +static bool far_invalid; + +static uint64_t translate_to_host_paddr(unsigned long vaddr) +{ + uint64_t pinfo; + int64_t offset = vaddr / getpagesize() * sizeof(pinfo); + int fd; + uint64_t page_addr; + uint64_t paddr; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) + ksft_exit_fail_perror("Failed to open /proc/self/pagemap"); + if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { + close(fd); + ksft_exit_fail_perror("Failed to read /proc/self/pagemap"); + } + + close(fd); + + if ((pinfo & PAGE_PRESENT) == 0) + ksft_exit_fail_perror("Page not present"); + + page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT; + paddr = page_addr + (vaddr & (getpagesize() - 1)); + return paddr; +} + +static void write_einj_entry(const char *einj_path, uint64_t val) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + sprintf(cmd, "echo %#lx > %s", val, einj_path); + cmdfile = popen(cmd, "r"); + + if (pclose(cmdfile) == 0) + ksft_print_msg("echo %#lx > %s - done\n", val, einj_path); + else + ksft_exit_fail_perror("Failed to write EINJ entry"); +} + +static void inject_uer(uint64_t paddr) +{ + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) + ksft_test_result_skip("EINJ table no available in firmware"); + + if (access(EINJ_ETYPE, R_OK | W_OK) == -1) + ksft_test_result_skip("EINJ module probably not loaded?"); + + write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER); + write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER); + write_einj_entry(EINJ_ADDR, paddr); + write_einj_entry(EINJ_MASK, ~0x0UL); + write_einj_entry(EINJ_NOTRIGGER, 1); + write_einj_entry(EINJ_DOIT, 1); +} + +/* + * When host APEI successfully claims the SEA caused by guest_code, kernel + * will send SIGBUS signal with BUS_MCEERR_AR to test thread. + * + * We set up this SIGBUS handler to skip the test for that case. + */ +static void sigbus_signal_handler(int sig, siginfo_t *si, void *v) +{ + ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig); + ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n", + si->si_signo, si->si_errno, si->si_code, si->si_addr); + if (si->si_code == BUS_MCEERR_AR) + ksft_test_result_skip("SEA is claimed by host APEI\n"); + else + ksft_test_result_fail("Exit with signal unhandled\n"); + + exit(0); +} + +static void setup_sigbus_handler(void) +{ + struct sigaction act; + + memset(&act, 0, sizeof(act)); + sigemptyset(&act.sa_mask); + act.sa_sigaction = sigbus_signal_handler; + act.sa_flags = SA_SIGINFO; + TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0, + "Failed to setup SIGBUS handler"); +} + +static void guest_code(void) +{ + uint64_t guest_data; + + /* Consumes error will cause a SEA. */ + guest_data = *(uint64_t *)EINJ_GVA; + + GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n", + EINJ_GVA, guest_data); +} + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + u64 far = read_sysreg(far_el1); + bool expect_far_invalid = far_invalid; + + GUEST_PRINTF("Handling Guest SEA\n"); + GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + if (expect_far_invalid) { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV); + GUEST_PRINTF("Guest observed garbage value in FAR\n"); + } else { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0); + GUEST_ASSERT_EQ(far, EINJ_GVA); + } + + GUEST_DONE(); +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + bool guest_done = false; + struct kvm_run *run = vcpu->run; + u64 esr; + + /* Resume the vCPU after error injection to consume the error. */ + vcpu_run(vcpu); + + ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n", + exit_reason_str(run->exit_reason)); + ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n", + run->arm_sea.esr, run->arm_sea.flags); + ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n", + run->arm_sea.gva, run->arm_sea.gpa); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA); + + esr = run->arm_sea.esr; + TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW); + TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0); + TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0); + TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0); + + if (!(esr & ESR_ELx_FnV)) { + ksft_print_msg("Expect gva to match given FnV bit is 0\n"); + TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA); + } + + if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) { + ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n"); + TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK); + } + + far_invalid = esr & ESR_ELx_FnV; + + /* Inject a SEA into guest and expect handled in SEA handler. */ + vcpu_inject_sea(vcpu); + + /* Expect the guest to reach GUEST_DONE gracefully. */ + do { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_PRINTF: + ksft_print_msg("From guest: %s", uc.buffer); + break; + case UCALL_DONE: + ksft_print_msg("Guest done gracefully!\n"); + guest_done = 1; + break; + case UCALL_ABORT: + ksft_print_msg("Guest aborted!\n"); + guest_done = 1; + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + } + } while (!guest_done); +} + +static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) +{ + size_t backing_page_size; + size_t guest_page_size; + size_t alignment; + uint64_t num_guest_pages; + vm_paddr_t start_gpa; + enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; + struct kvm_vm *vm; + + backing_page_size = get_backing_src_pagesz(src_type); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + alignment = max(backing_page_size, guest_page_size); + num_guest_pages = VM_MEM_SIZE / guest_page_size; + + vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + + vm_install_sync_handler(vm, + /*vector=*/VECTOR_SYNC_CURRENT, + /*ec=*/ESR_ELx_EC_DABT_CUR, + /*handler=*/expect_sea_handler); + + start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size; + start_gpa = align_down(start_gpa, alignment); + + vm_userspace_mem_region_add( + /*vm=*/vm, + /*src_type=*/src_type, + /*guest_paddr=*/start_gpa, + /*slot=*/1, + /*npages=*/num_guest_pages, + /*flags=*/0); + + virt_map(vm, START_GVA, start_gpa, num_guest_pages); + + ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n", + num_guest_pages, START_GVA, start_gpa); + return vm; +} + +static void vm_inject_memory_uer(struct kvm_vm *vm) +{ + uint64_t guest_data; + + einj_gpa = addr_gva2gpa(vm, EINJ_GVA); + einj_hva = addr_gva2hva(vm, EINJ_GVA); + + /* Populate certain data before injecting UER. */ + *(uint64_t *)einj_hva = 0xBAADCAFE; + guest_data = *(uint64_t *)einj_hva; + ksft_print_msg("Before EINJect: data=%#lx\n", + guest_data); + + einj_hpa = translate_to_host_paddr((unsigned long)einj_hva); + + ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n", + EINJ_GVA, einj_gpa, einj_hva, einj_hpa); + + inject_uer(einj_hpa); + ksft_print_msg("Memory UER EINJected\n"); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER)); + + setup_sigbus_handler(); + + vm = vm_create_with_sea_handler(&vcpu); + vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0); + vm_inject_memory_uer(vm); + run_vm(vm, vcpu); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c new file mode 100644 index 000000000000..c4815d365816 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -0,0 +1,813 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * set_id_regs - Test for setting ID register from usersapce. + * + * Copyright (c) 2023 Google LLC. + * + * + * Test that KVM supports setting ID registers from userspace and handles the + * feature set correctly. + */ + +#include <stdint.h> +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include <linux/bitfield.h> + +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE, /* Bigger value is safe */ + FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */ + FTR_END, /* Mark the last ftr bits */ +}; + +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + +struct reg_ftr_bits { + char *name; + bool sign; + enum ftr_type type; + uint8_t shift; + uint64_t mask; + /* + * For FTR_EXACT, safe_val is used as the exact safe value. + * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. + */ + int64_t safe_val; +}; + +struct test_feature_reg { + uint32_t reg; + const struct reg_ftr_bits *ftr_bits; +}; + +#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \ + { \ + .name = #NAME, \ + .sign = SIGNED, \ + .type = TYPE, \ + .shift = SHIFT, \ + .mask = MASK, \ + .safe_val = SAFE_VAL, \ + } + +#define REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define S_REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define REG_FTR_END \ + { \ + .type = FTR_END, \ + } + +static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), + REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0), + REG_FTR_END, +}; + +#define TEST_REG(id, table) \ + { \ + .reg = id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct test_feature_reg test_regs[] = { + TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1), + TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1), + TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), + TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), + TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), + TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), + TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), + TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), + TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), + TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), +}; + +#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0); + +static void guest_code(void) +{ + GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); + GUEST_REG_SYNC(SYS_CTR_EL0); + GUEST_REG_SYNC(SYS_MIDR_EL1); + GUEST_REG_SYNC(SYS_REVIDR_EL1); + GUEST_REG_SYNC(SYS_AIDR_EL1); + + GUEST_DONE(); +} + +/* Return a safe value to a given ftr_bits an ftr value */ +uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); + + if (ftr_bits->sign == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > ftr_bits->safe_val) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == ftr_max) + ftr = 0; + else if (ftr != 0) + ftr++; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > ftr_bits->safe_val) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max - 1) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr != 0 && ftr != ftr_max - 1) + ftr++; + break; + default: + break; + } + } + + return ftr; +} + +/* Return an invalid value to a given ftr_bits an ftr value */ +uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); + + if (ftr_bits->sign == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max; + else + ftr--; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max - 1; + else + ftr--; + break; + default: + break; + } + } else { + ftr = 0; + } + + return ftr; +} + +static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, new_val, ftr; + + val = vcpu_get_reg(vcpu, reg); + ftr = (val & mask) >> shift; + + ftr = get_safe_value(ftr_bits, ftr); + + ftr <<= shift; + val &= ~mask; + val |= ftr; + + vcpu_set_reg(vcpu, reg, val); + new_val = vcpu_get_reg(vcpu, reg); + TEST_ASSERT_EQ(new_val, val); + + return new_val; +} + +static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, old_val, ftr; + int r; + + val = vcpu_get_reg(vcpu, reg); + ftr = (val & mask) >> shift; + + ftr = get_invalid_value(ftr_bits, ftr); + + old_val = val; + ftr <<= shift; + val &= ~mask; + val |= ftr; + + r = __vcpu_set_reg(vcpu, reg, val); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); + + val = vcpu_get_reg(vcpu, reg); + TEST_ASSERT_EQ(val, old_val); +} + +static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + +#define encoding_to_range_idx(encoding) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ + sys_reg_CRn(encoding), sys_reg_CRm(encoding), \ + sys_reg_Op2(encoding)) + + +static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + int ret; + + /* KVM should return error when reserved field is not zero */ + range.reserved[0] = 1; + ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + TEST_ASSERT(ret, "KVM doesn't check invalid parameters."); + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { + const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; + uint32_t reg_id = test_regs[i].reg; + uint64_t reg = KVM_ARM64_SYS_REG(reg_id); + int idx; + + /* Get the index to masks array for the idreg */ + idx = encoding_to_range_idx(reg_id); + + for (int j = 0; ftr_bits[j].type != FTR_END; j++) { + /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ + if (aarch64_only && sys_reg_CRm(reg_id) < 4) { + ksft_test_result_skip("%s on AARCH64 only system\n", + ftr_bits[j].name); + continue; + } + + /* Make sure the feature field is writable */ + TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); + + test_reg_set_fail(vcpu, reg, &ftr_bits[j]); + + test_reg_vals[idx] = test_reg_set_success(vcpu, reg, + &ftr_bits[j]); + + ksft_test_result_pass("%s\n", ftr_bits[j].name); + } + } +} + +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + +#define MTE_IDREG_TEST 1 +static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + uint64_t mte; + uint64_t mte_frac; + int idx, err; + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + if (!mte) { + ksft_test_result_skip("MTE capability not supported, nothing to test\n"); + return; + } + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n"); + return; + } + + /* + * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2) + * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported + * and MTE_frac == 0 indicates it is supported. + * + * As MTE_frac was previously unconditionally read as 0, check + * that the set to 0 succeeds but does not change MTE_frac + * from unsupported (0xF) to supported (0). + * + */ + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || + mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { + ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); + return; + } + + /* Try to set MTE_frac=0. */ + val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) { + ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n"); + return; + } + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); + else + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); +} + +static void test_guest_reg_read(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_SYNC: + /* Make sure the written values are seen by guest */ + TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], + uc.args[3]); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +/* Politely lifted from arch/arm64/include/asm/cache.h */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static void test_clidr(struct kvm_vcpu *vcpu) +{ + uint64_t clidr; + int level; + + clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); + + /* find the first empty level in the cache hierarchy */ + for (level = 1; level <= 7; level++) { + if (!CLIDR_CTYPE(clidr, level)) + break; + } + + /* + * If you have a mind-boggling 7 levels of cache, congratulations, you + * get to fix this. + */ + TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy"); + + /* stick in a unified cache level */ + clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level); + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr); + test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; +} + +static void test_ctr(struct kvm_vcpu *vcpu) +{ + u64 ctr; + + ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0)); + ctr &= ~CTR_EL0_DIC_MASK; + if (ctr & CTR_EL0_IminLine_MASK) + ctr--; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); + test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; +} + +static void test_id_reg(struct kvm_vcpu *vcpu, u32 id) +{ + u64 val; + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id)); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val); + test_reg_vals[encoding_to_range_idx(id)] = val; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + test_clidr(vcpu); + test_ctr(vcpu); + + test_id_reg(vcpu, SYS_MPIDR_EL1); + ksft_test_result_pass("%s\n", __func__); +} + +static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + test_id_reg(vcpu, SYS_MIDR_EL1); + test_id_reg(vcpu, SYS_REVIDR_EL1); + test_id_reg(vcpu, SYS_AIDR_EL1); + + ksft_test_result_pass("%s\n", __func__); +} + +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +{ + size_t idx = encoding_to_range_idx(encoding); + uint64_t observed; + + observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); + TEST_ASSERT_EQ(test_reg_vals[idx], observed); +} + +static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) +{ + /* + * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an + * architectural reset of the vCPU. + */ + aarch64_vcpu_setup(vcpu, NULL); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) + test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + + test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); + test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1); + + ksft_test_result_pass("%s\n", __func__); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool aarch64_only; + uint64_t val, el0; + int test_cnt, i, j; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); + + test_wants_mte(); + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); + vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + /* Check for AARCH64 only system */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); + aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); + + ksft_print_header(); + + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + for (i = 0; i < ARRAY_SIZE(test_regs); i++) + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) + test_cnt++; + + ksft_set_plan(test_cnt); + + test_vm_ftr_id_regs(vcpu, aarch64_only); + test_vcpu_ftr_id_regs(vcpu); + test_vcpu_non_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); + test_user_set_mte_reg(vcpu); + + test_guest_reg_read(vcpu); + + test_reset_preserves_id_regs(vcpu); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index f4ceae9c8925..1763b9d45400 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -22,8 +22,20 @@ enum smccc_conduit { SMC_INSN, }; +static bool test_runs_at_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + struct kvm_vcpu_init init; + + kvm_get_default_vcpu_target(vm, &init); + kvm_vm_free(vm); + + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + #define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ + conduit <= SMC_INSN; conduit++) static void guest_main(uint32_t func_id, enum smccc_conduit conduit) { @@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) struct kvm_vm *vm; vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); /* * Enable in-kernel emulation of PSCI to ensure that calls are denied @@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -178,7 +191,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu) struct ucall uc; if (get_ucall(vcpu, &uc) != UCALL_SYNC) - TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, "Unexpected SMCCC return code: %lu", uc.args[1]); diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c index 80b74c6f152b..80b74c6f152b 100644 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index eef816b80993..8d6d3a4ae4db 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -4,23 +4,23 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <linux/kernel.h> #include <sys/syscall.h> #include <asm/kvm.h> #include <asm/kvm_para.h> +#include <arm64/gic_v3.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -84,6 +84,18 @@ static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, return v; } +static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) +{ + struct vm_gic v; + + v.gic_dev_type = gic_dev_type; + v.vm = vm_create_barebones(); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + return v; +} + + static void vm_gic_destroy(struct vm_gic *v) { close(v->gic_fd); @@ -357,6 +369,40 @@ static void test_vcpus_then_vgic(uint32_t gic_dev_type) vm_gic_destroy(&v); } +#define KVM_VGIC_V2_ATTR(offset, cpu) \ + (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \ + FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu)) + +#define GIC_CPU_CTRL 0x00 + +static void test_v2_uaccess_cpuif_no_vcpus(void) +{ + struct vm_gic v; + u64 val = 0; + int ret; + + v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2); + subtest_dist_rdist(&v); + + ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0)); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + + vm_gic_destroy(&v); +} + static void test_v3_new_redist_regions(void) { struct kvm_vcpu *vcpus[NR_VCPUS]; @@ -630,6 +676,44 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ @@ -670,11 +754,228 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + void run_tests(uint32_t gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); test_vgic_then_vcpus(gic_dev_type); + if (VGIC_DEV_IS_V2(gic_dev_type)) + test_v2_uaccess_cpuif_no_vcpus(); + if (VGIC_DEV_IS_V3(gic_dev_type)) { test_v3_new_redist_regions(); test_v3_typer_accesses(); @@ -682,6 +983,8 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); + test_v3_nassgicap(); } } @@ -691,6 +994,8 @@ int main(int ac, char **av) int pa_bits; int cnt_impl = 0; + test_disable_default_vgic(); + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index 90d854e0fcff..2fb2c7939fe9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -7,7 +7,6 @@ * host to inject a specific intid via a GUEST_SYNC call, and then checks that * it received it. */ - #include <asm/kvm.h> #include <asm/kvm_para.h> #include <sys/eventfd.h> @@ -20,9 +19,6 @@ #include "gic_v3.h" #include "vgic.h" -#define GICD_BASE_GPA 0x08000000ULL -#define GICR_BASE_GPA 0x080A0000ULL - /* * Stores the user specified args; it's passed to the guest and to every test * function. @@ -33,6 +29,7 @@ struct test_args { bool level_sensitive; /* 1 is level, 0 is edge */ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ + uint32_t shared_data; }; /* @@ -50,9 +47,6 @@ struct test_args { #define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) #define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ -static void *dist = (void *)GICD_BASE_GPA; -static void *redist = (void *)GICR_BASE_GPA; - /* * The kvm_inject_* utilities are used by the guest to ask the host to inject * interrupts (e.g., using the KVM_IRQ_LINE ioctl). @@ -153,7 +147,7 @@ static void reset_stats(void) static uint64_t gic_read_ap1r0(void) { - uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1); + uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); dsb(sy); return reg; @@ -161,7 +155,7 @@ static uint64_t gic_read_ap1r0(void) static void gic_write_ap1r0(uint64_t val) { - write_sysreg_s(val, SYS_ICV_AP1R0_EL1); + write_sysreg_s(val, SYS_ICC_AP1R0_EL1); isb(); } @@ -212,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, do { \ uint32_t _intid; \ _intid = gic_get_and_ack_irq(); \ - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ + GUEST_ASSERT(_intid == IAR_SPURIOUS); \ } while (0) #define CAT_HELPER(a, b) a ## b @@ -276,13 +270,12 @@ static void guest_inject(struct test_args *args, KVM_INJECT_MULTI(cmd, first_intid, num); while (irq_handled < num) { - asm volatile("wfi\n" - "msr daifclr, #2\n" - /* handle IRQ */ - "msr daifset, #2\n" - : : : "memory"); + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); } - asm volatile("msr daifclr, #2" : : : "memory"); + local_irq_enable(); GUEST_ASSERT_EQ(irq_handled, num); for (i = first_intid; i < num + first_intid; i++) @@ -367,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void) * interrupts for the whole test. */ static void test_inject_preemption(struct test_args *args, - uint32_t first_intid, int num, - kvm_inject_cmd cmd) + uint32_t first_intid, int num, + const unsigned long *exclude, + kvm_inject_cmd cmd) { uint32_t intid, prio, step = KVM_PRIO_STEPS; int i; @@ -387,6 +381,10 @@ static void test_inject_preemption(struct test_args *args, for (i = 0; i < num; i++) { uint32_t tmp; intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + KVM_INJECT(cmd, intid); /* Each successive IRQ will preempt the previous one. */ tmp = wait_for_and_activate_irq(); @@ -398,15 +396,33 @@ static void test_inject_preemption(struct test_args *args, /* finish handling the IRQs starting with the highest priority one. */ for (i = 0; i < num; i++) { intid = num - i - 1 + first_intid; + + if (exclude && test_bit(intid - first_intid, exclude)) + continue; + gic_set_eoi(intid); - if (args->eoi_split) - gic_set_dir(intid); + } + + if (args->eoi_split) { + for (i = 0; i < num; i++) { + intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + + if (args->eoi_split) + gic_set_dir(intid); + } } local_irq_enable(); - for (i = 0; i < num; i++) + for (i = 0; i < num; i++) { + if (exclude && test_bit(i, exclude)) + continue; + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + } GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); GUEST_ASSERT_IAR_EMPTY(); @@ -444,33 +460,32 @@ static void test_injection_failure(struct test_args *args, static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) { - /* - * Test up to 4 levels of preemption. The reason is that KVM doesn't - * currently implement the ability to have more than the number-of-LRs - * number of concurrently active IRQs. The number of LRs implemented is - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. - */ + /* Timer PPIs cannot be injected from userspace */ + static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) | + BIT(30 - MIN_PPI) | + BIT(28 - MIN_PPI) | + BIT(26 - MIN_PPI)); + if (f->sgi) - test_inject_preemption(args, MIN_SGI, 4, f->cmd); + test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd); if (f->ppi) - test_inject_preemption(args, MIN_PPI, 4, f->cmd); + test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd); if (f->spi) - test_inject_preemption(args, MIN_SPI, 4, f->cmd); + test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd); } static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) { - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ if (f->sgi) - guest_restore_active(args, MIN_SGI, 4, f->cmd); + guest_restore_active(args, MIN_SGI, 16, f->cmd); if (f->ppi) - guest_restore_active(args, MIN_PPI, 4, f->cmd); + guest_restore_active(args, MIN_PPI, 16, f->cmd); if (f->spi) - guest_restore_active(args, MIN_SPI, 4, f->cmd); + guest_restore_active(args, MIN_SPI, 31, f->cmd); } static void guest_code(struct test_args *args) @@ -479,14 +494,14 @@ static void guest_code(struct test_args *args) bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; - gic_init(GIC_V3, 1, dist, redist); - - for (i = 0; i < nr_irqs; i++) - gic_irq_enable(i); + gic_init(GIC_V3, 1); for (i = MIN_SPI; i < nr_irqs; i++) gic_irq_set_config(i, !level_sensitive); + for (i = 0; i < nr_irqs; i++) + gic_irq_enable(i); + gic_set_eoi_split(args->eoi_split); reset_priorities(args); @@ -628,18 +643,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { @@ -650,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) - close(fd[f]); + kvm_close(fd[f]); } /* handles the valid case: intid=0xffffffff num=1 */ @@ -765,9 +774,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpu, 1, args_gva); - gic_fd = vgic_v3_setup(vm, 1, nr_irqs, - GICD_BASE_GPA, GICR_BASE_GPA); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); + gic_fd = vgic_v3_setup(vm, 1, nr_irqs); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); @@ -781,7 +788,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) run_guest_cmd(vcpu, gic_fd, &inject_args, &args); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -795,6 +802,221 @@ done: kvm_vm_free(vm); } +static void guest_code_asym_dir(struct test_args *args, int cpuid) +{ + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + if (cpuid == 0) { + uint32_t intid; + + local_irq_disable(); + + gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO); + gic_irq_enable(MIN_SPI); + gic_irq_set_pending(MIN_SPI); + + intid = wait_for_and_activate_irq(); + GUEST_ASSERT_EQ(intid, MIN_SPI); + + gic_set_eoi(intid); + isb(); + + WRITE_ONCE(args->shared_data, MIN_SPI); + dsb(ishst); + + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) == MIN_SPI); + GUEST_ASSERT(!gic_irq_get_active(MIN_SPI)); + } else { + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) != MIN_SPI); + + gic_set_dir(MIN_SPI); + isb(); + + WRITE_ONCE(args->shared_data, 0); + dsb(ishst); + } + + GUEST_DONE(); +} + +static void guest_code_group_en(struct test_args *args, int cpuid) +{ + uint32_t intid; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(0); + gic_set_priority_mask(CPU_PRIO_MASK); + /* SGI0 is G0, which is disabled */ + gic_irq_set_group(0, 0); + + /* Configure all SGIs with decreasing priority */ + for (intid = 0; intid < MIN_PPI; intid++) { + gic_set_priority(intid, (intid + 1) * 8); + gic_irq_enable(intid); + gic_irq_set_pending(intid); + } + + /* Ack and EOI all G1 interrupts */ + for (int i = 1; i < MIN_PPI; i++) { + intid = wait_for_and_activate_irq(); + + GUEST_ASSERT(intid < MIN_PPI); + gic_set_eoi(intid); + isb(); + } + + /* + * Check that SGI0 is still pending, inactive, and that we cannot + * ack anything. + */ + GUEST_ASSERT(gic_irq_get_pending(0)); + GUEST_ASSERT(!gic_irq_get_active(0)); + GUEST_ASSERT_IAR_EMPTY(); + GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS); + + /* Open the G0 gates, and verify we can ack SGI0 */ + write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1); + isb(); + + do { + intid = read_sysreg_s(SYS_ICC_IAR0_EL1); + } while (intid == IAR_SPURIOUS); + + GUEST_ASSERT(intid == 0); + GUEST_DONE(); +} + +static void guest_code_timer_spi(struct test_args *args, int cpuid) +{ + uint32_t intid; + u64 val; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + /* Add a pending SPI so that KVM starts trapping DIR */ + gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO); + gic_irq_set_pending(MIN_SPI + cpuid); + + /* Configure the timer with a higher priority, make it pending */ + gic_set_priority(27, IRQ_DEFAULT_PRIO - 8); + + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* Enable both interrupts */ + gic_irq_enable(MIN_SPI + cpuid); + gic_irq_enable(27); + + /* The timer must fire */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + /* Check that we can deassert it */ + write_sysreg(0, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(!gic_irq_get_pending(27)); + + /* + * Priority drop, deactivation -- we expect that the host + * deactivation will have been effective + */ + gic_set_eoi(27); + gic_set_dir(27); + + GUEST_ASSERT(!gic_irq_get_active(27)); + + /* Do it one more time */ + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* The timer must fire again */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + GUEST_DONE(); +} + +static void *test_vcpu_run(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct ucall uc; + + while (1) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + + return NULL; +} + +static void test_vgic_two_cpus(void *gcode) +{ + pthread_t thr[2]; + struct kvm_vcpu *vcpus[2]; + struct test_args args = {}; + struct kvm_vm *vm; + vm_vaddr_t args_gva; + int gic_fd, ret; + + vm = vm_create_with_vcpus(2, gcode, vcpus); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpus[0]); + vcpu_init_descriptor_tables(vcpus[1]); + + /* Setup the guest args page (so it gets the args). */ + args_gva = vm_vaddr_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vcpus[0], 2, args_gva, 0); + vcpu_args_set(vcpus[1], 2, args_gva, 1); + + gic_fd = vgic_v3_setup(vm, 2, 64); + + ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret); + ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret); + + pthread_join(thr[0], NULL); + pthread_join(thr[1], NULL); + + close(gic_fd); + kvm_vm_free(vm); +} + static void help(const char *name) { printf( @@ -817,6 +1039,9 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; + TEST_REQUIRE(kvm_supports_vgic_v3()); + test_disable_default_vgic(); + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': @@ -848,6 +1073,9 @@ int main(int argc, char **argv) test_vgic(nr_irqs, false /* level */, true /* eoi_split */); test_vgic(nr_irqs, true /* level */, false /* eoi_split */); test_vgic(nr_irqs, true /* level */, true /* eoi_split */); + test_vgic_two_cpus(guest_code_asym_dir); + test_vgic_two_cpus(guest_code_group_en); + test_vgic_two_cpus(guest_code_timer_spi); } else { test_vgic(nr_irqs, level_sensitive, eoi_split); } diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c new file mode 100644 index 000000000000..e857a605f577 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_lpi_stress - Stress test for KVM's ITS emulation + * + * Copyright (c) 2024 Google LLC + */ + +#include <linux/sizes.h> +#include <pthread.h> +#include <stdatomic.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "gic_v3_its.h" +#include "processor.h" +#include "ucall.h" +#include "vgic.h" + +#define TEST_MEMSLOT_INDEX 1 + +#define GIC_LPI_OFFSET 8192 + +static size_t nr_iterations = 1000; +static vm_paddr_t gpa_base; + +static struct kvm_vm *vm; +static struct kvm_vcpu **vcpus; +static int its_fd; + +static struct test_data { + bool request_vcpus_stop; + u32 nr_cpus; + u32 nr_devices; + u32 nr_event_ids; + + vm_paddr_t device_table; + vm_paddr_t collection_table; + vm_paddr_t cmdq_base; + void *cmdq_base_va; + vm_paddr_t itt_tables; + + vm_paddr_t lpi_prop_table; + vm_paddr_t lpi_pend_tables; +} test_data = { + .nr_cpus = 1, + .nr_devices = 1, + .nr_event_ids = 16, +}; + +static void guest_irq_handler(struct ex_regs *regs) +{ + u32 intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(intid >= GIC_LPI_OFFSET); + gic_set_eoi(intid); +} + +static void guest_setup_its_mappings(void) +{ + u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; + u32 nr_events = test_data.nr_event_ids; + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + + for (coll_id = 0; coll_id < nr_cpus; coll_id++) + its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); + + /* Round-robin the LPIs to all of the vCPUs in the VM */ + coll_id = 0; + for (device_id = 0; device_id < nr_devices; device_id++) { + vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + + its_send_mapd_cmd(test_data.cmdq_base_va, device_id, + itt_base, SZ_64K, true); + + for (event_id = 0; event_id < nr_events; event_id++) { + its_send_mapti_cmd(test_data.cmdq_base_va, device_id, + event_id, coll_id, intid++); + + coll_id = (coll_id + 1) % test_data.nr_cpus; + } + } +} + +static void guest_invalidate_all_rdists(void) +{ + int i; + + for (i = 0; i < test_data.nr_cpus; i++) + its_send_invall_cmd(test_data.cmdq_base_va, i); +} + +static void guest_setup_gic(void) +{ + static atomic_int nr_cpus_ready = 0; + u32 cpuid = guest_get_vcpuid(); + + gic_init(GIC_V3, test_data.nr_cpus); + gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, + test_data.lpi_pend_tables + (cpuid * SZ_64K)); + + atomic_fetch_add(&nr_cpus_ready, 1); + + if (cpuid > 0) + return; + + while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) + cpu_relax(); + + its_init(test_data.collection_table, SZ_64K, + test_data.device_table, SZ_64K, + test_data.cmdq_base, SZ_64K); + + guest_setup_its_mappings(); + guest_invalidate_all_rdists(); + + /* SYNC to ensure ITS setup is complete */ + for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++) + its_send_sync_cmd(test_data.cmdq_base_va, cpuid); +} + +static void guest_code(size_t nr_lpis) +{ + guest_setup_gic(); + local_irq_enable(); + + GUEST_SYNC(0); + + /* + * Don't use WFI here to avoid blocking the vCPU thread indefinitely and + * never getting the stop signal. + */ + while (!READ_ONCE(test_data.request_vcpus_stop)) + cpu_relax(); + + GUEST_DONE(); +} + +static void setup_memslot(void) +{ + size_t pages; + size_t sz; + + /* + * For the ITS: + * - A single level device table + * - A single level collection table + * - The command queue + * - An ITT for each device + */ + sz = (3 + test_data.nr_devices) * SZ_64K; + + /* + * For the redistributors: + * - A shared LPI configuration table + * - An LPI pending table for each vCPU + */ + sz += (1 + test_data.nr_cpus) * SZ_64K; + + pages = sz / vm->page_size; + gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, + TEST_MEMSLOT_INDEX, pages, 0); +} + +#define LPI_PROP_DEFAULT_PRIO 0xa0 + +static void configure_lpis(void) +{ + size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; + u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); + size_t i; + + for (i = 0; i < nr_lpis; i++) { + tbl[i] = LPI_PROP_DEFAULT_PRIO | + LPI_PROP_GROUP1 | + LPI_PROP_ENABLED; + } +} + +static void setup_test_data(void) +{ + size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + vm_paddr_t cmdq_base; + + test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, + TEST_MEMSLOT_INDEX); + virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); + test_data.cmdq_base = cmdq_base; + test_data.cmdq_base_va = (void *)cmdq_base; + + test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, + gpa_base, TEST_MEMSLOT_INDEX); + + test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, TEST_MEMSLOT_INDEX); + configure_lpis(); + + test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, + gpa_base, TEST_MEMSLOT_INDEX); + + sync_global_to_guest(vm, test_data); +} + +static void setup_gic(void) +{ + its_fd = vgic_its_setup(vm); +} + +static void signal_lpi(u32 device_id, u32 event_id) +{ + vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + + struct kvm_msi msi = { + .address_lo = db_addr, + .address_hi = db_addr >> 32, + .data = event_id, + .devid = device_id, + .flags = KVM_MSI_VALID_DEVID, + }; + + /* + * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, + * which for arm64 implies having a valid translation in the ITS. + */ + TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, + "KVM_SIGNAL_MSI ioctl failed"); +} + +static pthread_barrier_t test_setup_barrier; + +static void *lpi_worker_thread(void *data) +{ + u32 device_id = (size_t)data; + u32 event_id; + size_t i; + + pthread_barrier_wait(&test_setup_barrier); + + for (i = 0; i < nr_iterations; i++) + for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) + signal_lpi(device_id, event_id); + + return NULL; +} + +static void *vcpu_worker_thread(void *data) +{ + struct kvm_vcpu *vcpu = data; + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + pthread_barrier_wait(&test_setup_barrier); + continue; + case UCALL_DONE: + return NULL; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall: %lu", uc.cmd); + } + } + + return NULL; +} + +static void report_stats(struct timespec delta) +{ + double nr_lpis; + double time; + + nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; + + time = delta.tv_sec; + time += ((double)delta.tv_nsec) / NSEC_PER_SEC; + + pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); +} + +static void run_test(void) +{ + u32 nr_devices = test_data.nr_devices; + u32 nr_vcpus = test_data.nr_cpus; + pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); + pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); + struct timespec start, delta; + size_t i; + + TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); + + pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); + + for (i = 0; i < nr_vcpus; i++) + pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); + + for (i = 0; i < nr_devices; i++) + pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); + + pthread_barrier_wait(&test_setup_barrier); + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < nr_devices; i++) + pthread_join(lpi_threads[i], NULL); + + delta = timespec_elapsed(start); + write_guest_global(vm, test_data.request_vcpus_stop, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i], NULL); + + report_stats(delta); +} + +static void setup_vm(void) +{ + int i; + + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + for (i = 0; i < test_data.nr_cpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + setup_memslot(); + + setup_gic(); + + setup_test_data(); +} + +static void destroy_vm(void) +{ + close(its_fd); + kvm_vm_free(vm); + free(vcpus); +} + +static void pr_usage(const char *name) +{ + pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); + pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); + pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); + pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); + pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); +} + +int main(int argc, char **argv) +{ + u32 nr_threads; + int c; + + TEST_REQUIRE(kvm_supports_vgic_v3()); + + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { + switch (c) { + case 'v': + test_data.nr_cpus = atoi(optarg); + break; + case 'd': + test_data.nr_devices = atoi(optarg); + break; + case 'e': + test_data.nr_event_ids = atoi(optarg); + break; + case 'i': + nr_iterations = strtoul(optarg, NULL, 0); + break; + case 'h': + default: + pr_usage(argv[0]); + return 1; + } + } + + nr_threads = test_data.nr_cpus + test_data.nr_devices; + if (nr_threads > get_nprocs()) + pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", + nr_threads, get_nprocs()); + + setup_vm(); + + run_test(); + + destroy_vm(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c new file mode 100644 index 000000000000..ae36325c022f --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vpmu_counter_access - Test vPMU event counter access + * + * Copyright (c) 2023 Google LLC. + * + * This test checks if the guest can see the same number of the PMU event + * counters (PMCR_EL0.N) that userspace sets, if the guest can access + * those counters, and if the guest is prevented from accessing any + * other counters. + * It also checks if the userspace accesses to the PMU regsisters honor the + * PMCR.N value that's set for the guest. + * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host. + */ +#include <kvm_util.h> +#include <processor.h> +#include <test_util.h> +#include <vgic.h> +#include <perf/arm_pmuv3.h> +#include <linux/bitfield.h> + +/* The max number of the PMU event counters (excluding the cycle counter) */ +#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1) + +/* The cycle counter bit position that's common among the PMU registers */ +#define ARMV8_PMU_CYCLE_IDX 31 + +struct vpmu_vm { + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; +}; + +static struct vpmu_vm vpmu_vm; + +struct pmreg_sets { + uint64_t set_reg_id; + uint64_t clr_reg_id; +}; + +#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} + +static uint64_t get_pmcr_n(uint64_t pmcr) +{ + return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); +} + +static uint64_t get_counters_mask(uint64_t n) +{ + uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); + + if (n) + mask |= GENMASK(n - 1, 0); + return mask; +} + +/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */ +static inline unsigned long read_sel_evcntr(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevcntr_el0); +} + +/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */ +static inline void write_sel_evcntr(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevcntr_el0); + isb(); +} + +/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */ +static inline unsigned long read_sel_evtyper(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevtyper_el0); +} + +/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */ +static inline void write_sel_evtyper(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevtyper_el0); + isb(); +} + +static void pmu_disable_reset(void) +{ + uint64_t pmcr = read_sysreg(pmcr_el0); + + /* Reset all counters, disabling them */ + pmcr &= ~ARMV8_PMU_PMCR_E; + write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0); + isb(); +} + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); + isb(); +} + +#define READ_PMEVTYPERN(n) \ + return read_sysreg(pmevtyper##n##_el0) +static unsigned long read_pmevtypern(int n) +{ + PMEVN_SWITCH(n, READ_PMEVTYPERN); + return 0; +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); + isb(); +} + +/* + * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0 + * accessors that test cases will use. Each of the accessors will + * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0 + * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through + * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()). + * + * This is used to test that combinations of those accessors provide + * the consistent behavior. + */ +struct pmc_accessor { + /* A function to be used to read PMEVTCNTR<n>_EL0 */ + unsigned long (*read_cntr)(int idx); + /* A function to be used to write PMEVTCNTR<n>_EL0 */ + void (*write_cntr)(int idx, unsigned long val); + /* A function to be used to read PMEVTYPER<n>_EL0 */ + unsigned long (*read_typer)(int idx); + /* A function to be used to write PMEVTYPER<n>_EL0 */ + void (*write_typer)(int idx, unsigned long val); +}; + +struct pmc_accessor pmc_accessors[] = { + /* test with all direct accesses */ + { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern }, + /* test with all indirect accesses */ + { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper }, + /* read with direct accesses, and write with indirect accesses */ + { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper }, + /* read with indirect accesses, and write with direct accesses */ + { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern }, +}; + +/* + * Convert a pointer of pmc_accessor to an index in pmc_accessors[], + * assuming that the pointer is one of the entries in pmc_accessors[]. + */ +#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0]) + +#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ +{ \ + uint64_t _tval = read_sysreg(regname); \ + \ + if (set_expected) \ + __GUEST_ASSERT((_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ + _tval, mask, set_expected); \ + else \ + __GUEST_ASSERT(!(_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \ + _tval, mask, set_expected); \ +} + +/* + * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers + * are set or cleared as specified in @set_expected. + */ +static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) +{ + GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected); +} + +/* + * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding + * to the specified counter (@pmc_idx) can be read/written as expected. + * When @set_op is true, it tries to set the bit for the counter in + * those registers by writing the SET registers (the bit won't be set + * if the counter is not implemented though). + * Otherwise, it tries to clear the bits in the registers by writing + * the CLR registers. + * Then, it checks if the values indicated in the registers are as expected. + */ +static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) +{ + uint64_t pmcr_n, test_bit = BIT(pmc_idx); + bool set_expected = false; + + if (set_op) { + write_sysreg(test_bit, pmcntenset_el0); + write_sysreg(test_bit, pmintenset_el1); + write_sysreg(test_bit, pmovsset_el0); + + /* The bit will be set only if the counter is implemented */ + pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0)); + set_expected = (pmc_idx < pmcr_n) ? true : false; + } else { + write_sysreg(test_bit, pmcntenclr_el0); + write_sysreg(test_bit, pmintenclr_el1); + write_sysreg(test_bit, pmovsclr_el0); + } + check_bitmap_pmu_regs(test_bit, set_expected); +} + +/* + * Tests for reading/writing registers for the (implemented) event counter + * specified by @pmc_idx. + */ +static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + uint64_t write_data, read_data; + + /* Disable all PMCs and reset all PMCs to zero. */ + pmu_disable_reset(); + + /* + * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1. + */ + + /* Make sure that the bit in those registers are set to 0 */ + test_bitmap_pmu_regs(pmc_idx, false); + /* Test if setting the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, true); + /* Test if clearing the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, false); + + /* + * Tests for reading/writing the event type register. + */ + + /* + * Set the event type register to an arbitrary value just for testing + * of reading/writing the register. + * Arm ARM says that for the event from 0x0000 to 0x003F, + * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is + * the value written to the field even when the specified event + * is not supported. + */ + write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED); + acc->write_typer(pmc_idx, write_data); + read_data = acc->read_typer(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); + + /* + * Tests for reading/writing the event count register. + */ + + read_data = acc->read_cntr(pmc_idx); + + /* The count value must be 0, as it is disabled and reset */ + __GUEST_ASSERT(read_data == 0, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data); + + write_data = read_data + pmc_idx + 0x12345; + acc->write_cntr(pmc_idx, write_data); + read_data = acc->read_cntr(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); +} + +#define INVALID_EC (-1ul) +uint64_t expected_ec = INVALID_EC; + +static void guest_sync_handler(struct ex_regs *regs) +{ + uint64_t esr, ec; + + esr = read_sysreg(esr_el1); + ec = ESR_ELx_EC(esr); + + __GUEST_ASSERT(expected_ec == ec, + "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", + regs->pc, esr, ec, expected_ec); + + /* skip the trapping instruction */ + regs->pc += 4; + + /* Use INVALID_EC to indicate an exception occurred */ + expected_ec = INVALID_EC; +} + +/* + * Run the given operation that should trigger an exception with the + * given exception class. The exception handler (guest_sync_handler) + * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip + * the instruction that trapped. + */ +#define TEST_EXCEPTION(ec, ops) \ +({ \ + GUEST_ASSERT(ec != INVALID_EC); \ + WRITE_ONCE(expected_ec, ec); \ + dsb(ish); \ + ops; \ + GUEST_ASSERT(expected_ec == INVALID_EC); \ +}) + +/* + * Tests for reading/writing registers for the unimplemented event counter + * specified by @pmc_idx (>= PMCR_EL0.N). + */ +static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + /* + * Reading/writing the event count/type registers should cause + * an UNDEFINED exception. + */ + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + /* + * The bit corresponding to the (unimplemented) counter in + * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. + */ + test_bitmap_pmu_regs(pmc_idx, 1); + test_bitmap_pmu_regs(pmc_idx, 0); +} + +/* + * The guest is configured with PMUv3 with @expected_pmcr_n number of + * event counters. + * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and + * if reading/writing PMU registers for implemented or unimplemented + * counters works as expected. + */ +static void guest_code(uint64_t expected_pmcr_n) +{ + uint64_t pmcr, pmcr_n, unimp_mask; + int i, pmc; + + __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, + "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x", + expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS); + + pmcr = read_sysreg(pmcr_el0); + pmcr_n = get_pmcr_n(pmcr); + + /* Make sure that PMCR_EL0.N indicates the value userspace set */ + __GUEST_ASSERT(pmcr_n == expected_pmcr_n, + "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx", + expected_pmcr_n, pmcr_n); + + /* + * Make sure that (RAZ) bits corresponding to unimplemented event + * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset + * to zero. + * (NOTE: bits for implemented event counters are reset to UNKNOWN) + */ + unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n); + check_bitmap_pmu_regs(unimp_mask, false); + + /* + * Tests for reading/writing PMU registers for implemented counters. + * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = 0; pmc < pmcr_n; pmc++) + test_access_pmc_regs(&pmc_accessors[i], pmc); + } + + /* + * Tests for reading/writing PMU registers for unimplemented counters. + * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++) + test_access_invalid_pmc_regs(&pmc_accessors[i], pmc); + } + + GUEST_DONE(); +} + +/* Create a VM that has one vCPU with PMUv3 configured. */ +static void create_vpmu_vm(void *guest_code) +{ + struct kvm_vcpu_init init; + uint8_t pmuver, ec; + uint64_t dfr0, irq = 23; + struct kvm_device_attr irq_attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_IRQ, + .addr = (uint64_t)&irq, + }; + + /* The test creates the vpmu_vm multiple times. Ensure a clean state */ + memset(&vpmu_vm, 0, sizeof(vpmu_vm)); + + vpmu_vm.vm = vm_create(1); + vm_init_descriptor_tables(vpmu_vm.vm); + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { + vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, + guest_sync_handler); + } + + /* Create vCPU with PMUv3 */ + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); + vcpu_init_descriptor_tables(vpmu_vm.vcpu); + + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); + + /* Make sure that PMUv3 support is indicated in the ID register */ + dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); + TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && + pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, + "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); + + vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); +} + +static void destroy_vpmu_vm(void) +{ + kvm_vm_free(vpmu_vm.vm); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) +{ + struct ucall uc; + + vcpu_args_set(vcpu, 1, pmcr_n); + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) +{ + struct kvm_vcpu *vcpu; + unsigned int prev; + int ret; + + create_vpmu_vm(guest_code); + vcpu = vpmu_vm.vcpu; + + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); + + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); + + if (expect_fail) + TEST_ASSERT(ret && errno == EINVAL, + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", + nr_counters, prev); + else + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); +} + +/* + * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, + * and run the test. + */ +static void run_access_test(uint64_t pmcr_n) +{ + uint64_t sp; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_init init; + + pr_debug("Test with pmcr_n %lu\n", pmcr_n); + + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + /* Save the initial sp to restore them later to run the guest again */ + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); + + run_vcpu(vcpu, pmcr_n); + + /* + * Reset and re-initialize the vCPU, and run the guest code again to + * check if PMCR_EL0.N is preserved. + */ + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + aarch64_vcpu_setup(vcpu, &init); + vcpu_init_descriptor_tables(vcpu); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + + run_vcpu(vcpu, pmcr_n); + + destroy_vpmu_vm(); +} + +static struct pmreg_sets validity_check_reg_sets[] = { + PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0), + PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1), + PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0), +}; + +/* + * Create a VM, and check if KVM handles the userspace accesses of + * the PMU register sets in @validity_check_reg_sets[] correctly. + */ +static void run_pmregs_validity_test(uint64_t pmcr_n) +{ + int i; + struct kvm_vcpu *vcpu; + uint64_t set_reg_id, clr_reg_id, reg_val; + uint64_t valid_counters_mask, max_counters_mask; + + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + valid_counters_mask = get_counters_mask(pmcr_n); + max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS); + + for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) { + set_reg_id = validity_check_reg_sets[i].set_reg_id; + clr_reg_id = validity_check_reg_sets[i].clr_reg_id; + + /* + * Test if the 'set' and 'clr' variants of the registers + * are initialized based on the number of valid counters. + */ + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + + /* + * Using the 'set' variant, force-set the register to the + * max number of possible counters and test if KVM discards + * the bits for unimplemented counters as it should. + */ + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id)); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + } + + destroy_vpmu_vm(); +} + +/* + * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for + * the vCPU to @pmcr_n, which is larger than the host value. + * The attempt should fail as @pmcr_n is too big to set for the vCPU. + */ +static void run_error_test(uint64_t pmcr_n) +{ + pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); + + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); + destroy_vpmu_vm(); +} + +/* + * Return the default number of implemented PMU event counters excluding + * the cycle counter (i.e. PMCR_EL0.N value) for the guest. + */ +static uint64_t get_pmcr_n_limit(void) +{ + uint64_t pmcr; + + create_vpmu_vm(guest_code); + pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + destroy_vpmu_vm(); + return get_pmcr_n(pmcr); +} + +static bool kvm_supports_nr_counters_attr(void) +{ + bool supported; + + create_vpmu_vm(NULL); + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); + destroy_vpmu_vm(); + + return supported; +} + +int main(void) +{ + uint64_t i, pmcr_n; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + TEST_REQUIRE(kvm_supports_vgic_v3()); + TEST_REQUIRE(kvm_supports_nr_counters_attr()); + + pmcr_n = get_pmcr_n_limit(); + for (i = 0; i <= pmcr_n; i++) { + run_access_test(i); + run_pmregs_validity_test(i); + } + + for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++) + run_error_test(i); + + return 0; +} diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c new file mode 100644 index 000000000000..60cb25454899 --- /dev/null +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/sizes.h> + +#include <kvm_util.h> +#include <processor.h> + +#include "ucall_common.h" + +struct kvm_coalesced_io { + struct kvm_coalesced_mmio_ring *ring; + uint32_t ring_size; + uint64_t mmio_gpa; + uint64_t *mmio; + + /* + * x86-only, but define pio_port for all architectures to minimize the + * amount of #ifdeffery and complexity, without having to sacrifice + * verbose error messages. + */ + uint8_t pio_port; +}; + +static struct kvm_coalesced_io kvm_builtin_io_ring; + +#ifdef __x86_64__ +static const int has_pio = 1; +#else +static const int has_pio = 0; +#endif + +static void guest_code(struct kvm_coalesced_io *io) +{ + int i, j; + + for (;;) { + for (j = 0; j < 1 + has_pio; j++) { + /* + * KVM always leaves one free entry, i.e. exits to + * userspace before the last entry is filled. + */ + for (i = 0; i < io->ring_size - 1; i++) { +#ifdef __x86_64__ + if (i & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } +#ifdef __x86_64__ + if (j & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } + GUEST_SYNC(0); + + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); +#ifdef __x86_64__ + outl(io->pio_port, io->pio_port + i); +#endif + } +} + +static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + const bool want_pio = expected_exit == KVM_EXIT_IO; + struct kvm_coalesced_mmio_ring *ring = io->ring; + struct kvm_run *run = vcpu->run; + uint32_t pio_value; + + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + + vcpu_run(vcpu); + + /* + * Annoyingly, reading PIO data is safe only for PIO exits, otherwise + * data_offset is garbage, e.g. an MMIO gpa. + */ + if (run->exit_reason == KVM_EXIT_IO) + pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + else + pio_value = 0; + + TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && + run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && + *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && + run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && + pio_value == io->pio_port + io->ring_size - 1)), + "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n " + "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n " + "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x", + ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO", + want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, + (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, + run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); +} + +static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + int i; + + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit); + + TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first, + "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u", + ring->first, ring->last, io->ring_size, ring_start); + + for (i = 0; i < io->ring_size - 1; i++) { + uint32_t idx = (ring->first + i) % io->ring_size; + struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; + +#ifdef __x86_64__ + if (i & 1) + TEST_ASSERT(entry->phys_addr == io->pio_port && + entry->len == 4 && entry->pio && + *(uint32_t *)entry->data == io->pio_port + i, + "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", + io->pio_port, io->pio_port + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint32_t *)entry->data); + else +#endif + TEST_ASSERT(entry->phys_addr == io->mmio_gpa && + entry->len == 8 && !entry->pio, + "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", + io->mmio_gpa, io->mmio_gpa + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint64_t *)entry->data); + } +} + +static void test_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, uint32_t ring_start) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + + kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); +#ifdef __x86_64__ + kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); +#endif + + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO); +#ifdef __x86_64__ + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO); +#endif + + /* + * Verify ucall, which may use non-coalesced MMIO or PIO, generates an + * immediate exit. + */ + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(ring->first, ring_start); + TEST_ASSERT_EQ(ring->last, ring_start); + + /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO); + +#ifdef __x86_64__ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO); +#endif +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO)); + +#ifdef __x86_64__ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO)); +#endif + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + kvm_builtin_io_ring = (struct kvm_coalesced_io) { + /* + * The I/O ring is a kernel-allocated page whose address is + * relative to each vCPU's run page, with the page offset + * provided by KVM in the return of KVM_CAP_COALESCED_MMIO. + */ + .ring = (void *)vcpu->run + + (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()), + + /* + * The size of the I/O ring is fixed, but KVM defines the sized + * based on the kernel's PAGE_SIZE. Thus, userspace must query + * the host's page size at runtime to compute the ring size. + */ + .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) / + sizeof(struct kvm_coalesced_mmio), + + /* + * Arbitrary address+port (MMIO mustn't overlap memslots), with + * the MMIO GPA identity mapped in the guest. + */ + .mmio_gpa = 4ull * SZ_1G, + .mmio = (uint64_t *)(4ull * SZ_1G), + .pio_port = 0x80, + }; + + virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + + sync_global_to_guest(vm, kvm_builtin_io_ring); + vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); + + for (i = 0; i < kvm_builtin_io_ring.ring_size; i++) + test_coalesced_io(vcpu, &kvm_builtin_io_ring, i); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 09c116a82a84..0202b78f8680 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -6,14 +6,10 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019, Google, Inc. */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <time.h> -#include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> #include <sys/syscall.h> @@ -22,6 +18,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #include "userfaultfd_util.h" #ifdef __NR_userfaultfd @@ -45,10 +42,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory */ ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) != UCALL_SYNC) { TEST_ASSERT(false, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); } @@ -77,8 +74,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, copy.mode = 0; r = ioctl(uffd, UFFDIO_COPY, ©); - if (r == -1) { - pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -89,8 +98,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, cont.range.len = demand_paging_size; r = ioctl(uffd, UFFDIO_CONTINUE, &cont); - if (r == -1) { - pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -110,7 +131,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, struct test_params { int uffd_mode; + bool single_uffd; useconds_t uffd_delay; + int readers_per_uffd; enum vm_mem_backing_src_type src_type; bool partition_vcpu_memory_access; }; @@ -131,10 +154,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; + uint64_t uffd_region_size; struct timespec start; struct timespec ts_diff; + double vcpu_paging_rate; struct kvm_vm *vm; - int i; + int i, num_uffds = 0; vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -147,7 +172,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { - for (i = 0; i < nr_vcpus; i++) { + num_uffds = p->single_uffd ? 1 : nr_vcpus; + for (i = 0; i < num_uffds; i++) { vcpu_args = &memstress_args.vcpu_args[i]; prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), vcpu_args->pages * memstress_args.guest_page_size); @@ -155,9 +181,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) } if (p->uffd_mode) { - uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + num_uffds = p->single_uffd ? 1 : nr_vcpus; + uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds; + + uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *)); TEST_ASSERT(uffd_descs, "Memory allocation failed"); - for (i = 0; i < nr_vcpus; i++) { + for (i = 0; i < num_uffds; i++) { + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; vcpu_args = &memstress_args.vcpu_args[i]; @@ -170,7 +200,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) */ uffd_descs[i] = uffd_setup_demand_paging( p->uffd_mode, p->uffd_delay, vcpu_hva, - vcpu_args->pages * memstress_args.guest_page_size, + uffd_region_size, + p->readers_per_uffd, &handle_uffd_page_request); } } @@ -187,15 +218,19 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (p->uffd_mode) { /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) + for (i = 0; i < num_uffds; i++) uffd_stop_demand_paging(uffd_descs[i]); } - pr_info("Total guest execution time: %ld.%.9lds\n", + pr_info("Total guest execution time:\t%ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - pr_info("Overall demand paging rate: %f pgs/sec\n", - memstress_args.vcpu_args[0].pages * nr_vcpus / - ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); + + vcpu_paging_rate = memstress_args.vcpu_args[0].pages / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC); + pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n", + vcpu_paging_rate); + pr_info("Overall demand paging rate:\t%f pgs/sec\n", + vcpu_paging_rate * nr_vcpus); memstress_destroy_vm(vm); @@ -207,15 +242,20 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" - " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n" + " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n" + " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); guest_modes_help(); printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); kvm_print_vcpu_pinning_help(); + printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n" + " creating one for each region paged by a unique vCPU\n" + " Set implicitly with -o, and no effect without -u.\n"); printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); + printf(" -r: Set the number of reader threads per uffd.\n"); printf(" -b: specify the size of the memory region which should be\n" " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -234,12 +274,14 @@ int main(int argc, char *argv[]) struct test_params p = { .src_type = DEFAULT_VM_MEM_SRC, .partition_vcpu_memory_access = true, + .readers_per_uffd = 1, + .single_uffd = false, }; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { + while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -251,6 +293,9 @@ int main(int argc, char *argv[]) p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); break; + case 'a': + p.single_uffd = true; + break; case 'd': p.uffd_delay = strtoul(optarg, NULL, 0); TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); @@ -271,6 +316,13 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + p.single_uffd = true; + break; + case 'r': + p.readers_per_uffd = atoi(optarg); + TEST_ASSERT(p.readers_per_uffd >= 1, + "Invalid number of readers per uffd %d: must be >=1", + p.readers_per_uffd); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d374dbcf9a53..0a1ea1d1e2d8 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,41 +18,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" - -#ifdef __aarch64__ -#include "aarch64/vgic.h" - -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - -static int gic_fd; - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ - /* - * The test can still run even if hardware does not support GICv3, as it - * is only an optimization to reduce guest exits. - */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ - if (gic_fd > 0) - close(gic_fd); -} - -#else /* __aarch64__ */ - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ -} - -#endif +#include "ucall_common.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -88,9 +54,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu_idx); @@ -132,7 +98,6 @@ struct test_params { enum vm_mem_backing_src_type backing_src; int slots; uint32_t write_percent; - uint32_t random_seed; bool random_access; }; @@ -156,8 +121,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) p->slots, p->backing_src, p->partition_vcpu_memory_access); - pr_info("Random seed: %u\n", p->random_seed); - memstress_set_random_seed(vm, p->random_seed); memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; @@ -171,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, dirty_log_manual_caps); - arch_setup_vm(vm, nr_vcpus); - /* Start the iterations */ iteration = 0; host_quit = false; @@ -290,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } memstress_free_bitmaps(bitmaps, p->slots); - arch_cleanup_vm(vm); memstress_destroy_vm(vm); } @@ -346,11 +306,13 @@ int main(int argc, char *argv[]) .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, - .random_seed = 1, .write_percent = 100, }; int opt; + /* Override the seed to be deterministic by default. */ + guest_random_seed = 1; + dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | @@ -395,7 +357,7 @@ int main(int argc, char *argv[]) p.phys_offset = strtoull(optarg, NULL, 0); break; case 'r': - p.random_seed = atoi_positive("Random seed", optarg); + guest_random_seed = atoi_positive("Random seed", optarg); break; case 's': p.backing_src = parse_backing_src_type(optarg); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 936f3a8d1b83..d58a641b0e6a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <pthread.h> @@ -23,6 +20,7 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 @@ -33,15 +31,18 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 - /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -76,8 +77,9 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; -static uint64_t random_array[TEST_PAGES_PER_LOOP]; static uint64_t iteration; +static uint64_t nr_writes; +static bool vcpu_stop; /* * Guest physical memory offset of the testing memory slot. @@ -99,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(void) { uint64_t addr; - int i; + +#ifdef __s390x__ + uint64_t i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -109,19 +113,22 @@ static void guest_code(void) */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; - *(uint64_t *)addr = READ_ONCE(iteration); + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } +#endif while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + while (!READ_ONCE(vcpu_stop)) { addr = guest_test_virt_mem; - addr += (READ_ONCE(random_array[i]) % guest_num_pages) + addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); - *(uint64_t *)addr = READ_ONCE(iteration); + + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } - /* Tell the host that we need more random numbers */ GUEST_SYNC(1); } } @@ -136,25 +143,18 @@ static uint64_t host_num_pages; /* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count; -static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont; -/* - * This is only set by main thread, and only cleared by vcpu thread. It is - * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC - * is the only place that we'll guarantee both "dirty bit" and "dirty data" - * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted - * after setting dirty bit but before the data is written. - */ -static atomic_t vcpu_sync_stop_requested; + /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; + /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -169,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page; +static uint64_t dirty_ring_last_page = -1ULL; + +/* + * In addition to the above, it is possible (especially if this + * test is run nested) for the above scenario to repeat multiple times: + * + * The following can happen: + * + * - L1 vCPU: Memory write is logged to PML but not committed. + * + * - L1 test thread: Ignores the write because its last dirty ring entry + * Resets the dirty ring which: + * - Resets the A/D bits in EPT + * - Issues tlb flush (invept), which is intercepted by L0 + * + * - L0: frees the whole nested ept mmu root as the response to invept, + * and thus ensures that when memory write is retried, it will fault again + * + * - L1 vCPU: Same memory write is logged to the PML but not committed again. + * + * - L1 test thread: Ignores the write because its last dirty ring entry (again) + * Resets the dirty ring which: + * - Resets the A/D bits in EPT (again) + * - Issues tlb flush (again) which is intercepted by L0 + * + * ... + * + * N times + * + * - L1 vCPU: Memory write is logged in the PML and then committed. + * Lots of other memory writes are logged and committed. + * ... + * + * - L1 test thread: Sees the memory write along with other memory writes + * in the dirty ring, and since the write is usually not + * the last entry in the dirty-ring and has a very outdated + * iteration, the test fails. + * + * + * Note that this is only possible when the write was the last log entry + * write during iteration N-1, thus remember last iteration last log entry + * and also don't fail when it is reported in the next iteration, together with + * an outdated iteration count. + */ +static uint64_t dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -194,24 +238,6 @@ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; -static void vcpu_kick(void) -{ - pthread_kill(vcpu_thread, SIG_IPI); -} - -/* - * In our test we do signal tricks, let's use a better version of - * sem_wait to avoid signal interrupts - */ -static void sem_wait_until(sem_t *sem) -{ - int ret; - - do - ret = sem_wait(sem); - while (ret == -1 && errno == EINTR); -} - static bool clear_log_supported(void) { return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); @@ -246,23 +272,18 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Should only be called after a GUEST_SYNC */ static void vcpu_handle_sync_stop(void) { - if (atomic_read(&vcpu_sync_stop_requested)) { - /* It means main thread is sleeping waiting */ - atomic_set(&vcpu_sync_stop_requested, false); + if (READ_ONCE(vcpu_stop)) { sem_post(&sem_vcpu_stop); - sem_wait_until(&sem_vcpu_cont); + sem_wait(&sem_vcpu_cont); } } -static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), - "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); vcpu_handle_sync_stop(); @@ -327,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, "%u != %u", cur->slot, slot); TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); - //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); @@ -338,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, return count; } -static void dirty_ring_wait_vcpu(void) -{ - /* This makes sure that hardware PML cache flushed */ - vcpu_kick(); - sem_wait_until(&sem_vcpu_stop); -} - -static void dirty_ring_continue_vcpu(void) -{ - pr_info("Notifying vcpu to continue\n"); - sem_post(&sem_vcpu_cont); -} - static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx) { - uint32_t count = 0, cleared; - bool continued_vcpu = false; - - dirty_ring_wait_vcpu(); - - if (!dirty_ring_vcpu_ring_full) { - /* - * This is not a ring-full event, it's safe to allow - * vcpu to continue - */ - dirty_ring_continue_vcpu(); - continued_vcpu = true; - } + uint32_t count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -376,51 +371,31 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - - if (!continued_vcpu) { - TEST_ASSERT(dirty_ring_vcpu_ring_full, - "Didn't continue vcpu even without ring full"); - dirty_ring_continue_vcpu(); - } - - pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ if (get_ucall(vcpu, NULL) == UCALL_SYNC) { - /* We should allow this to continue */ - ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || - (ret == -1 && err == EINTR)) { - /* Update the flag first before pause */ - WRITE_ONCE(dirty_ring_vcpu_ring_full, - run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&sem_vcpu_stop); - pr_info("vcpu stops because %s...\n", - dirty_ring_vcpu_ring_full ? - "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&sem_vcpu_cont); - pr_info("vcpu continues now.\n"); + vcpu_handle_sync_stop(); + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + WRITE_ONCE(dirty_ring_vcpu_ring_full, true); + vcpu_handle_sync_stop(); } else { TEST_ASSERT(false, "Invalid guest sync status: " - "exit_reason=%s\n", + "exit_reason=%s", exit_reason_str(run->exit_reason)); } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -432,8 +407,7 @@ struct log_mode { void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -452,20 +426,10 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -505,191 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } -static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vcpu, ret, err); -} - -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - -static void generate_random_array(uint64_t *guest_array, uint64_t size) -{ - uint64_t i; - - for (i = 0; i < size; i++) - guest_array[i] = random(); + mode->after_vcpu_run(vcpu); } static void *vcpu_worker(void *data) { - int ret; struct kvm_vcpu *vcpu = data; - struct kvm_vm *vm = vcpu->vm; - uint64_t *guest_array; - uint64_t pages_count = 0; - struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) - + sizeof(sigset_t)); - sigset_t *sigset = (sigset_t *) &sigmask->sigset; - - /* - * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the - * ioctl to return with -EINTR, but it is still pending and we need - * to accept it with the sigwait. - */ - sigmask->len = 8; - pthread_sigmask(0, NULL, sigset); - sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); - sigemptyset(sigset); - sigaddset(sigset, SIG_IPI); - - guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); + sem_wait(&sem_vcpu_cont); while (!READ_ONCE(host_quit)) { - /* Clear any existing kick signals */ - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); - pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = __vcpu_run(vcpu); - if (ret == -1 && errno == EINTR) { - int sig = -1; - sigwait(sigset, &sig); - assert(sig == SIG_IPI); - } - log_mode_after_vcpu_run(vcpu, ret, errno); + vcpu_run(vcpu); + log_mode_after_vcpu_run(vcpu); } - pr_info("Dirtied %"PRIu64" pages\n", pages_count); - return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { + uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; uint64_t step = vm_num_host_pages(mode, 1); - uint64_t page; - uint64_t *value_ptr; - uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { - value_ptr = host_test_mem + page * host_page_size; - - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } - - if (__test_and_clear_bit_le(page, bmap)) { - bool matched; + uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - host_dirty_count++; + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { + nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - matched = (*value_ptr == iteration || - *value_ptr == iteration - 1); - - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { - if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; + if (val == iteration) + continue; + + if (host_log_mode == LOG_MODE_DIRTY_RING) { + /* + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. + */ + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) continue; - } else if (page == dirty_ring_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && + val < iteration) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { - host_clear_count++; + nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } } + + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); + + host_dirty_count += nr_dirty_pages; + host_clear_count += nr_clean_pages; } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, @@ -699,10 +581,11 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = __vm_create(mode, 1, extra_mem_pages); + vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages); log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -717,8 +600,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long *bmap[2]; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -759,12 +643,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + + /* + * The workaround in guest_code() to write all pages prior to the first + * iteration isn't compatible with the dirty ring, as the dirty ring + * support relies on the vCPU to actually stop when vcpu_stop is set so + * that the vCPU doesn't hang waiting for the dirty ring to be emptied. + */ + TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, + "Test needs to be updated to support s390 dirty ring"); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -785,30 +678,109 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - host_quit = false; host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(host_quit, false); + + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + + TEST_ASSERT_EQ(vcpu_stop, false); pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, - &ring_buf_idx); + for (iteration = 1; iteration <= p->iterations; iteration++) { + unsigned long i; + + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); + + /* + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. + */ + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ + usleep(1000); + + sync_global_from_guest(vm, nr_writes); + + /* + * Reap dirty pages while the guest is running so that + * dirty ring full events are resolved, i.e. so that a + * larger interval doesn't always end up with a vCPU + * that's effectively blocked. Collecting while the + * guest is running also verifies KVM doesn't lose any + * state. + * + * For bitmap modes, KVM overwrites the entire bitmap, + * i.e. collecting the bitmaps is destructive. Collect + * the bitmap only on the first pass, otherwise this + * test would lose track of dirty pages. + */ + if (i && host_log_mode != LOG_MODE_DIRTY_RING) + continue; + + /* + * For the dirty ring, empty the ring on subsequent + * passes only if the ring was filled at least once, + * to verify KVM's handling of a full ring (emptying + * the ring on every pass would make it unlikely the + * vCPU would ever fill the fing). + */ + if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) + continue; + + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[0], host_num_pages, + &ring_buf_idx); + } + + /* + * Stop the vCPU prior to collecting and verifying the dirty + * log. If the vCPU is allowed to run during collection, then + * pages that are written during this iteration may be missed, + * i.e. collected in the next iteration. And if the vCPU is + * writing memory during verification, pages that this thread + * sees as clean may be written with this iteration's value. + */ + WRITE_ONCE(vcpu_stop, true); + sync_global_to_guest(vm, vcpu_stop); + sem_wait(&sem_vcpu_stop); /* - * See vcpu_sync_stop_requested definition for details on why - * we need to stop vcpu when verify data. + * Clear vcpu_stop after the vCPU thread has acknowledge the + * stop request and is waiting, i.e. is definitely not running! */ - atomic_set(&vcpu_sync_stop_requested, true); - sem_wait_until(&sem_vcpu_stop); + WRITE_ONCE(vcpu_stop, false); + sync_global_to_guest(vm, vcpu_stop); + + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -816,26 +788,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ - assert(host_log_mode == LOG_MODE_DIRTY_RING || - atomic_read(&vcpu_sync_stop_requested) == false); + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - - iteration++; - sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } @@ -869,7 +837,6 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; - sigset_t sigset; sem_init(&sem_vcpu_stop, 0, 0); sem_init(&sem_vcpu_cont, 0, 0); @@ -920,19 +887,12 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", p.iterations, p.interval); - srandom(time(0)); - - /* Ensure that vCPU threads start with SIG_IPI blocked. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c new file mode 100644 index 000000000000..f4644c9d2d3b --- /dev/null +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * The blessed list should be created from the oldest possible kernel. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +static struct kvm_reg_list *reg_list; +static __u64 *blessed_reg, blessed_n; + +extern struct vcpu_reg_list *vcpu_configs[]; +extern int vcpu_configs_n; + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ + if (check_supported_reg(vcpu, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for_each_reg_filtered(i) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +#define for_each_present_blessed_reg(i) \ + for_each_reg(i) \ + if (find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +static const char *config_name(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int len = 0; + + if (c->name) + return c->name; + + for_each_sublist(c, s) + len += strlen(s->name) + 1; + + c->name = malloc(len); + + len = 0; + for_each_sublist(c, s) { + if (!strcmp(s->name, "base")) + continue; + if (len) + c->name[len++] = '+'; + strcpy(c->name + len, s->name); + len += strlen(s->name); + } + c->name[len] = '\0'; + + return c->name; +} + +bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + return true; +} + +bool __weak filter_reg(__u64 reg) +{ + return false; +} + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +void __weak print_reg(const char *prefix, __u64 id) +{ + printf("\t0x%llx,\n", id); +} + +bool __weak check_reject_set(int err) +{ + return true; +} + +void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ +} + +#ifdef __aarch64__ +static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, + struct kvm_vcpu_init *init) +{ + struct vcpu_reg_sublist *s; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); + + for_each_sublist(c, s) + if (s->capability) + init->features[s->feature / 32] |= 1 << (s->feature % 32); +} + +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + + prepare_vcpu_init(vm, c, &init); + vcpu = __vm_vcpu_add(vm, 0); + aarch64_vcpu_setup(vcpu, &init); + + return vcpu; +} +#else +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + return __vm_vcpu_add(vm, 0); +} +#endif + +static void check_supported(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + + for_each_sublist(c, s) { + if (!s->capability) + continue; + + __TEST_REQUIRE(kvm_has_cap(s->capability), + "%s: %s not available, skipping tests", + config_name(c), s->name); + } +} + +static bool print_list; +static bool print_filtered; + +static void run_test(struct vcpu_reg_list *c) +{ + int new_regs = 0, missing_regs = 0, i, n; + int failed_get = 0, failed_set = 0, failed_reject = 0; + int skipped_set = 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct vcpu_reg_sublist *s; + + check_supported(c); + + vm = vm_create_barebones(); + vcpu = vcpu_config_get_vcpu(c, vm); + finalize_vcpu(vcpu, c); + + reg_list = vcpu_get_reg_list(vcpu); + + if (print_list || print_filtered) { + putchar('\n'); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(config_name(c), id); + } + putchar('\n'); + return; + } + + for_each_sublist(c, s) + blessed_n += s->regs_n; + blessed_reg = calloc(blessed_n, sizeof(__u64)); + + n = 0; + for_each_sublist(c, s) { + for (i = 0; i < s->regs_n; ++i) + blessed_reg[n++] = s->regs[i]; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + * + * Only do the get/set tests on present, blessed list registers, + * since we don't know the capabilities of any new registers. + */ + for_each_present_blessed_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + bool reject_reg = false, skip_reg = false; + int ret; + + ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); + if (ret) { + printf("%s: Failed to get ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_get; + } + + for_each_sublist(c, s) { + /* rejects_set registers are rejected for set operation */ + if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { + reject_reg = true; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret != -1 || !check_reject_set(errno)) { + printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_reject; + } + break; + } + + /* skips_set registers are skipped for set operation */ + if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) { + skip_reg = true; + ++skipped_set; + break; + } + } + + if (!reject_reg && !skip_reg) { + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret) { + printf("%s: Failed to set ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_set; + } + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); + } + + if (new_regs) { + printf("\n%s: There are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", config_name(c), new_regs); + for_each_new_reg(i) + print_reg(config_name(c), reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\n%s: There are %d missing registers.\n" + "The following lines are missing registers:\n\n", config_name(c), missing_regs); + for_each_missing_reg(i) + print_reg(config_name(c), blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "%s: There are %d missing registers; %d registers failed get; " + "%d registers failed set; %d registers failed reject; %d registers skipped set", + config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set); + + pr_info("%s: PASS\n", config_name(c)); + blessed_n = 0; + free(blessed_reg); + free(reg_list); + kvm_vm_free(vm); +} + +static void help(void) +{ + struct vcpu_reg_list *c; + int i; + + printf( + "\n" + "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n" + " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" + " '<selection>' may be\n"); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + printf( + " '%s'\n", config_name(c)); + } + + printf( + "\n" + " --list Print the register list rather than test it (requires --config)\n" + " --list-filtered Print registers that would normally be filtered out (requires --config)\n" + "\n" + ); +} + +static struct vcpu_reg_list *parse_config(const char *config) +{ + struct vcpu_reg_list *c = NULL; + int i; + + if (config[8] != '=') + help(), exit(1); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (strcmp(config_name(c), &config[9]) == 0) + break; + } + + if (i == vcpu_configs_n) + help(), exit(1); + + return c; +} + +int main(int ac, char **av) +{ + struct vcpu_reg_list *c, *sel = NULL; + int i, ret = 0; + pid_t pid; + + for (i = 1; i < ac; ++i) { + if (strncmp(av[i], "--config", 8) == 0) + sel = parse_config(av[i]); + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; + else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) + help(), exit(0); + else + help(), exit(1); + } + + if (print_list || print_filtered) { + /* + * We only want to print the register list of a single config. + */ + if (!sel) + help(), exit(1); + } + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (sel && c != sel) + continue; + + pid = fork(); + + if (!pid) { + run_test(c); + exit(0); + } else { + int wstatus; + pid_t wpid = wait(&wstatus); + TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); + if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) + ret = KSFT_FAIL; + } + } + + return ret; +} diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c new file mode 100644 index 000000000000..618c937f3c90 --- /dev/null +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright Intel Corporation, 2023 + * + * Author: Chao Peng <chao.p.peng@linux.intel.com> + */ +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> + +#include <linux/bitmap.h> +#include <linux/falloc.h> +#include <linux/sizes.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "kvm_util.h" +#include "numaif.h" +#include "test_util.h" +#include "ucall_common.h" + +static size_t page_size; + +static void test_file_read_write(int fd, size_t total_size) +{ + char buf[64]; + + TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, + "read on a guest_mem fd should fail"); + TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, + "write on a guest_mem fd should fail"); + TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, + "pread on a guest_mem fd should fail"); + TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, + "pwrite on a guest_mem fd should fail"); +} + +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + +static void test_mmap_supported(int fd, size_t total_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + memset(mem, val, total_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + page_size); + TEST_ASSERT(!ret, "fallocate the first page should succeed."); + + for (i = 0; i < page_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); + for (; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + memset(mem, val, page_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, total_size); +} + +static void test_mbind(int fd, size_t total_size) +{ + const unsigned long nodemask_0 = 1; /* nid: 0 */ + unsigned long nodemask = 0; + unsigned long maxnode = 8; + int policy; + char *mem; + int ret; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + /* Test MPOL_INTERLEAVE policy */ + kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, + "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_INTERLEAVE, nodemask_0, policy, nodemask); + + /* Test basic MPOL_BIND policy */ + kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, + "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_BIND, nodemask_0, policy, nodemask); + + /* Test MPOL_DEFAULT policy */ + kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, + "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", + MPOL_DEFAULT, policy, nodemask); + + /* Test with invalid policy */ + ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "mbind with invalid policy should fail with EINVAL"); + + kvm_munmap(mem, total_size); +} + +static void test_numa_allocation(int fd, size_t total_size) +{ + unsigned long node0_mask = 1; /* Node 0 */ + unsigned long node1_mask = 2; /* Node 1 */ + unsigned long maxnode = 8; + void *pages[4]; + int status[4]; + char *mem; + int i; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + for (i = 0; i < 4; i++) + pages[i] = (char *)mem + page_size * i; + + /* Set NUMA policy after allocation */ + memset(mem, 0xaa, page_size); + kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); + + /* Set NUMA policy before allocation */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + /* Validate if pages are allocated on specified NUMA nodes */ + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); + + /* Punch hole for all pages */ + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); + + /* Change NUMA policy nodes and reallocate */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); + + kvm_munmap(mem, total_size); +} + +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); + + for (i = 0; i < accessible_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, map_size); +} + +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); +} + +static void test_mmap_not_supported(int fd, size_t total_size) +{ + char *mem; + + mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); +} + +static void test_file_size(int fd, size_t total_size) +{ + struct stat sb; + int ret; + + ret = fstat(fd, &sb); + TEST_ASSERT(!ret, "fstat should succeed"); + TEST_ASSERT_EQ(sb.st_size, total_size); + TEST_ASSERT_EQ(sb.st_blksize, page_size); +} + +static void test_fallocate(int fd, size_t total_size) +{ + int ret; + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size); + TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size - 1, page_size); + TEST_ASSERT(ret, "fallocate with unaligned offset should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size); + TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size); + TEST_ASSERT(ret, "fallocate beginning after total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size - 1); + TEST_ASSERT(ret, "fallocate with unaligned size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size); + TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); +} + +static void test_invalid_punch_hole(int fd, size_t total_size) +{ + struct { + off_t offset; + off_t len; + } testcases[] = { + {0, 1}, + {0, page_size - 1}, + {0, page_size + 1}, + + {1, 1}, + {1, page_size - 1}, + {1, page_size}, + {1, page_size + 1}, + + {page_size, 1}, + {page_size, page_size - 1}, + {page_size, page_size + 1}, + }; + int ret, i; + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + testcases[i].offset, testcases[i].len); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail", + testcases[i].offset, testcases[i].len); + } +} + +static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, + uint64_t guest_memfd_flags) +{ + size_t size; + int fd; + + for (size = 1; size < page_size; size++) { + fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", + size); + } +} + +static void test_create_guest_memfd_multiple(struct kvm_vm *vm) +{ + int fd1, fd2, ret; + struct stat st1, st2; + + fd1 = __vm_create_guest_memfd(vm, page_size, 0); + TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); + + fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); + TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); + + ret = fstat(fd2, &st2); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); + + close(fd2); + close(fd1); +} + +static void test_guest_memfd_flags(struct kvm_vm *vm) +{ + uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + uint64_t flag; + int fd; + + for (flag = BIT(0); flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + if (flag & valid_flags) { + TEST_ASSERT(fd >= 0, + "guest_memfd() with flag '0x%lx' should succeed", + flag); + close(fd); + } else { + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } + } +} + +#define gmem_test(__test, __vm, __flags) \ +do { \ + int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + \ + test_##__test(fd, page_size * 4); \ + close(fd); \ +} while (0) + +static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) +{ + test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags); + + gmem_test(file_read_write, vm, flags); + + if (flags & GUEST_MEMFD_FLAG_MMAP) { + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + gmem_test(numa_allocation, vm, flags); + } else { + gmem_test(fault_private, vm, flags); + } + + gmem_test(mmap_cow, vm, flags); + gmem_test(mbind, vm, flags); + } else { + gmem_test(mmap_not_supported, vm, flags); + } + + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); +} + +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + uint64_t flags; + + test_guest_memfd_flags(vm); + + __test_guest_memfd(vm, 0); + + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); + + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + + kvm_vm_free(vm); +} + +static void guest_code(uint8_t *mem, uint64_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + __GUEST_ASSERT(mem[i] == 0xaa, + "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); + + memset(mem, 0xff, size); + GUEST_DONE(); +} + +static void test_guest_memfd_guest(void) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables, and low memory contains + * the PCI hole and other MMIO regions that need to be avoided. + */ + const uint64_t gpa = SZ_4G; + const int slot = 1; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint8_t *mem; + size_t size; + int fd, i; + + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) + return; + + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); + + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + + size = vm->page_size; + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + memset(mem, 0xaa, size); + kvm_munmap(mem, size); + + virt_pg_map(vm, gpa, gpa); + vcpu_args_set(vcpu, 2, gpa, size); + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + for (i = 0; i < size; i++) + TEST_ASSERT_EQ(mem[i], 0xff); + + close(fd); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + unsigned long vm_types, vm_type; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + page_size = getpagesize(); + + /* + * Not all architectures support KVM_CAP_VM_TYPES. However, those that + * support guest_memfd have that support for the default VM type. + */ + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) + test_guest_memfd(vm_type); + + test_guest_memfd_guest(); +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c new file mode 100644 index 000000000000..bcf582852db9 --- /dev/null +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test for GUEST_PRINTF + * + * Copyright 2022, Google, Inc. and/or its affiliates. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +struct guest_vals { + uint64_t a; + uint64_t b; + uint64_t type; +}; + +static struct guest_vals vals; + +/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ +#define TYPE_LIST \ +TYPE(test_type_i64, I64, "%ld", int64_t) \ +TYPE(test_type_u64, U64u, "%lu", uint64_t) \ +TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \ +TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \ +TYPE(test_type_u32, U32u, "%u", uint32_t) \ +TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ +TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ +TYPE(test_type_int, INT, "%d", int) \ +TYPE(test_type_char, CHAR, "%c", char) \ +TYPE(test_type_str, STR, "'%s'", const char *) \ +TYPE(test_type_ptr, PTR, "%p", uintptr_t) + +enum args_type { +#define TYPE(fn, ext, fmt_t, T) TYPE_##ext, + TYPE_LIST +#undef TYPE +}; + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert); + +#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \ +const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \ +const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \ +static void fn(struct kvm_vcpu *vcpu, T a, T b) \ +{ \ + char expected_printf[UCALL_BUFFER_LEN]; \ + char expected_assert[UCALL_BUFFER_LEN]; \ + \ + snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \ + snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \ + vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \ + sync_global_to_guest(vcpu->vm, vals); \ + run_test(vcpu, expected_printf, expected_assert); \ +} + +#define TYPE(fn, ext, fmt_t, T) \ + BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) + TYPE_LIST +#undef TYPE + +static void guest_code(void) +{ + while (1) { + switch (vals.type) { +#define TYPE(fn, ext, fmt_t, T) \ + case TYPE_##ext: \ + GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \ + __GUEST_ASSERT(vals.a == vals.b, \ + ASSERT_FMT_##ext, vals.a, vals.b); \ + break; + TYPE_LIST +#undef TYPE + default: + GUEST_SYNC(vals.type); + } + + GUEST_DONE(); + } +} + +/* + * Unfortunately this gets a little messy because 'assert_msg' doesn't + * just contains the matching string, it also contains additional assert + * info. Fortunately the part that matches should be at the very end of + * 'assert_msg'. + */ +static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) +{ + int len_str = strlen(assert_msg); + int len_substr = strlen(expected_assert_msg); + int offset = len_str - len_substr; + + TEST_ASSERT(len_substr <= len_str, + "Expected '%s' to be a substring of '%s'", + assert_msg, expected_assert_msg); + + TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_assert_msg, &assert_msg[offset]); +} + +/* + * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional + * guest asserts guest can be verified instead of being reported as failures. + */ +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = __vcpu_run(vcpu); + } while (r == -1 && errno == EINTR); + + TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r)); +} + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert) +{ + struct kvm_run *run = vcpu->run; + struct ucall uc; + + while (1) { + do_vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]); + break; + case UCALL_PRINTF: + TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_printf, uc.buffer); + break; + case UCALL_ABORT: + ucall_abort(uc.buffer, expected_assert); + break; + case UCALL_DONE: + return; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +static void guest_code_limits(void) +{ + char test_str[UCALL_BUFFER_LEN + 10]; + + memset(test_str, 'a', sizeof(test_str)); + test_str[sizeof(test_str) - 1] = 0; + + GUEST_PRINTF("%s", test_str); +} + +static void test_limits(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); + run = vcpu->run; + do_vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)", + uc.cmd, UCALL_ABORT); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + test_type_i64(vcpu, -1, -1); + test_type_i64(vcpu, -1, 1); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_u32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdee); + + test_type_int(vcpu, -1, -1); + test_type_int(vcpu, -1, 1); + test_type_int(vcpu, 1, 1); + + test_type_char(vcpu, 'a', 'a'); + test_type_char(vcpu, 'a', 'A'); + test_type_char(vcpu, 'a', 'b'); + + test_type_str(vcpu, "foo", "foo"); + test_type_str(vcpu, "foo", "bar"); + + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + kvm_vm_free(vm); + + test_limits(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index f5d59b9934f1..94bd6ed24cf3 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -4,9 +4,6 @@ * kvm_arch_hardware_disable is called and it attempts to unregister the user * return notifiers. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <pthread.h> #include <semaphore.h> @@ -23,7 +20,6 @@ #define SLEEPING_THREAD_NUM (1 << 4) #define FORK_NUM (1ULL << 9) #define DELAY_US_MAX 2000 -#define GUEST_CODE_PIO_PORT 4 sem_t *sem; @@ -41,7 +37,7 @@ static void *run_vcpu(void *arg) vcpu_run(vcpu); - TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + TEST_ASSERT(false, "%s: exited with reason %d: %s", __func__, run->exit_reason, exit_reason_str(run->exit_reason)); pthread_exit(NULL); @@ -55,7 +51,7 @@ static void *sleeping_thread(void *arg) fd = open("/dev/null", O_RDWR); close(fd); } - TEST_ASSERT(false, "%s: exited\n", __func__); + TEST_ASSERT(false, "%s: exited", __func__); pthread_exit(NULL); } @@ -118,7 +114,7 @@ static void run_test(uint32_t run) for (i = 0; i < VCPU_NUM; ++i) check_join(threads[i], &b); /* Should not be reached */ - TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); + TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run); } void wait_for_child_setup(pid_t pid) diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h deleted file mode 100644 index ba0886e8a2bb..000000000000 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Interrupt Controller (GIC) v3 specific defines - */ - -#ifndef SELFTEST_KVM_GICV3_H -#define SELFTEST_KVM_GICV3_H - -#include <asm/sysreg.h> - -/* - * Distributor registers - */ -#define GICD_CTLR 0x0000 -#define GICD_TYPER 0x0004 -#define GICD_IGROUPR 0x0080 -#define GICD_ISENABLER 0x0100 -#define GICD_ICENABLER 0x0180 -#define GICD_ISPENDR 0x0200 -#define GICD_ICPENDR 0x0280 -#define GICD_ICACTIVER 0x0380 -#define GICD_ISACTIVER 0x0300 -#define GICD_IPRIORITYR 0x0400 -#define GICD_ICFGR 0x0C00 - -/* - * The assumption is that the guest runs in a non-secure mode. - * The following bits of GICD_CTLR are defined accordingly. - */ -#define GICD_CTLR_RWP (1U << 31) -#define GICD_CTLR_nASSGIreq (1U << 8) -#define GICD_CTLR_ARE_NS (1U << 4) -#define GICD_CTLR_ENABLE_G1A (1U << 1) -#define GICD_CTLR_ENABLE_G1 (1U << 0) - -#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0 - -/* - * Redistributor registers - */ -#define GICR_CTLR 0x000 -#define GICR_WAKER 0x014 - -#define GICR_CTLR_RWP (1U << 3) - -#define GICR_WAKER_ProcessorSleep (1U << 1) -#define GICR_WAKER_ChildrenAsleep (1U << 2) - -/* - * Redistributor registers, offsets from SGI base - */ -#define GICR_IGROUPR0 GICD_IGROUPR -#define GICR_ISENABLER0 GICD_ISENABLER -#define GICR_ICENABLER0 GICD_ICENABLER -#define GICR_ISPENDR0 GICD_ISPENDR -#define GICR_ISACTIVER0 GICD_ISACTIVER -#define GICR_ICACTIVER0 GICD_ICACTIVER -#define GICR_ICENABLER GICD_ICENABLER -#define GICR_ICACTIVER GICD_ICACTIVER -#define GICR_IPRIORITYR0 GICD_IPRIORITYR - -/* CPU interface registers */ -#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) - -#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0) - -#define ICC_PMR_DEF_PRIO 0xf0 - -#define ICC_SRE_EL1_SRE (1U << 0) - -#define ICC_IGRPEN1_EL1_ENABLE (1U << 0) - -#define GICV3_MAX_CPUS 512 - -#endif /* SELFTEST_KVM_GICV3_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index cb7c03de3a21..e2c4e9f0010f 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntpct_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) write_sysreg(cval, cntp_cval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -72,14 +72,14 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_cval_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ return 0; } -static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) { switch (timer) { case VIRTUAL: @@ -89,12 +89,28 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) write_sysreg(tval, cntp_tval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); } +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) { switch (timer) { @@ -105,7 +121,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) write_sysreg(ctl, cntp_ctl_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -119,7 +135,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_ctl_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -139,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) timer_set_tval(timer, msec_to_cycles(msec)); } +static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + +static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + #endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..329e4f5079ea 100644 --- a/tools/testing/selftests/kvm/include/aarch64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index b217ea17cac5..cc7a7f34ed37 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -6,11 +6,26 @@ #ifndef SELFTEST_KVM_GIC_H #define SELFTEST_KVM_GIC_H +#include <asm/kvm.h> + enum gic_type { GIC_V3, GIC_TYPE_MAX, }; +/* + * Note that the redistributor frames are at the end, as the range scales + * with the number of vCPUs in the VM. + */ +#define GITS_BASE_GPA 0x8000000ULL +#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE) +#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE) + +/* The GIC is identity-mapped into the guest at the time of setup. */ +#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA) +#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA) +#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA) + #define MIN_SGI 0 #define MIN_PPI 16 #define MIN_SPI 32 @@ -21,8 +36,7 @@ enum gic_type { #define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) #define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base); +void gic_init(enum gic_type type, unsigned int nr_cpus); void gic_irq_enable(unsigned int intid); void gic_irq_disable(unsigned int intid); unsigned int gic_get_and_ack_irq(void); @@ -43,5 +57,9 @@ void gic_irq_set_pending(unsigned int intid); void gic_irq_clear_pending(unsigned int intid); bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); +void gic_irq_set_group(unsigned int intid, bool group); + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table); #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h new file mode 100644 index 000000000000..a76615fa39a1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ +#ifndef __SELFTESTS_GIC_V3_H +#define __SELFTESTS_GIC_V3_H + +/* + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. + */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_TYPER2 0x000C +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_IGROUPR 0x0080 +#define GICD_ISENABLER 0x0100 +#define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 +#define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 +#define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IGROUPRnE 0x1000 +#define GICD_ISENABLERnE 0x1200 +#define GICD_ICENABLERnE 0x1400 +#define GICD_ISPENDRnE 0x1600 +#define GICD_ICPENDRnE 0x1800 +#define GICD_ISACTIVERnE 0x1A00 +#define GICD_ICACTIVERnE 0x1C00 +#define GICD_IPRIORITYRnE 0x2000 +#define GICD_ICFGRnE 0x3000 +#define GICD_IROUTER 0x6000 +#define GICD_IROUTERnE 0x8000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +#define ESPI_BASE_INTID 4096 + +/* + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). + */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + +#define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_nASSGIreq (1U << 8) +#define GICD_CTLR_DS (1U << 6) +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENABLE_G1A (1U << 1) +#define GICD_CTLR_ENABLE_G1 (1U << 0) + +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_RSS (1U << 26) +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) +#define GICD_TYPER_ESPI (1U << 8) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) +#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) +#define GICD_TYPER_ESPIS(typer) \ + (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) + +#define GICD_TYPER2_nASSGIcap (1U << 8) +#define GICD_TYPER2_VIL (1U << 7) +#define GICD_TYPER2_VID GENMASK(4, 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +#define GIC_PAGE_SIZE_4K 0ULL +#define GIC_PAGE_SIZE_16K 1ULL +#define GIC_PAGE_SIZE_64K 2ULL +#define GIC_PAGE_SIZE_MASK 3ULL + +/* + * Re-Distributor registers, offsets from RD_base + */ +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_CES (1UL << 1) +#define GICR_CTLR_IR (1UL << 2) +#define GICR_CTLR_RWP (1UL << 3) + +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define EPPI_BASE_INTID 1056 + +#define GICR_TYPER_NR_PPIS(r) \ + ({ \ + unsigned int __ppinum = ((r) >> 27) & 0x1f; \ + unsigned int __nr_ppis = 16; \ + if (__ppinum == 1 || __ppinum == 2) \ + __nr_ppis += __ppinum * 32; \ + \ + __nr_ppis; \ + }) + +#define GICR_WAKER_ProcessorSleep (1U << 1) +#define GICR_WAKER_ChildrenAsleep (1U << 2) + +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) + +/* + * Re-Distributor registers, offsets from SGI_base + */ +#define GICR_IGROUPR0 GICD_IGROUPR +#define GICR_ISENABLER0 GICD_ISENABLER +#define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER +#define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) +#define GICR_TYPER_DirectLPIS (1U << 3) +#define GICR_TYPER_LAST (1U << 4) +#define GICR_TYPER_RVPEID (1U << 7) +#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) +#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) + +#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) +#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) +#define GICR_INVLPIR_V GENMASK_ULL(63, 63) + +#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID +#define GICR_INVALLR_V GICR_INVLPIR_V + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +/* + * GICv4.1 VPROPBASER reinvention. A subtle mix between the old + * VPROPBASER and ITS_BASER. Just not quite any of the two. + */ +#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) +#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) +#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) +#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) +#define GICR_VPROPBASER_4_1_Z (1ULL << 52) +#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) +#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + +/* + * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, + * also use the above Valid, PendingLast and Dirty. + */ +#define GICR_VPENDBASER_4_1_DB (1ULL << 62) +#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) +#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) +#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) + +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_MPIDR 0x0018 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc + +#define GITS_TRANSLATER 0x10040 + +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS_SHIFT 8 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) +#define GITS_TYPER_VMOVP (1ULL << 37) +#define GITS_TYPER_VMAPP (1ULL << 40) +#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) + +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 + +#define GITS_CBASER_VALID (1ULL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK + +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) + +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) + +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1ULL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) +#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) +#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) +#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) +#define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_RESERVED3 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +#define GITS_LVL1_ENTRY_SIZE (8UL) + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPTI 0x0a +#define GITS_CMD_MAPI 0x0b +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) +#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) + +/* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) +#define ICC_CTLR_EL1_ExtRange (0x1 << 19) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) +#define ICC_SRE_EL1_SRE (1U << 0) + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) + +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) + +#endif diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h new file mode 100644 index 000000000000..58feef3eb386 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTESTS_GIC_V3_ITS_H__ +#define __SELFTESTS_GIC_V3_ITS_H__ + +#include <linux/sizes.h> + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size); + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid); +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid); +void its_send_invall_cmd(void *cmdq_base, u32 collection_id); +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id); + +#endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h new file mode 100644 index 000000000000..b973bb2c64a6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch { + bool has_gic; + int gic_fd; +}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index cb537253a6b9..ff928716574d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -8,8 +8,12 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include "ucall_common.h" + #include <linux/stringify.h> #include <linux/types.h> +#include <asm/brk-imm.h> +#include <asm/esr.h> #include <asm/sysreg.h> @@ -58,7 +62,66 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) -#define MPIDR_HWID_BITMASK (0xff00fffffful) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, @@ -100,26 +163,8 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -#define ESR_EC_NUM 64 -#define ESR_EC_SHIFT 26 -#define ESR_EC_MASK (ESR_EC_NUM - 1) - -#define ESR_EC_SVC64 0x15 -#define ESR_EC_IABT 0x21 -#define ESR_EC_DABT 0x25 -#define ESR_EC_HW_BP_CURRENT 0x31 -#define ESR_EC_SSTEP_CURRENT 0x33 -#define ESR_EC_WP_CURRENT 0x35 -#define ESR_EC_BRK_INS 0x3c - -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k); +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k); void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); @@ -130,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); static inline void cpu_relax(void) @@ -176,11 +222,28 @@ static __always_inline u32 __raw_readl(const volatile void *addr) return val; } +static __always_inline void __raw_writeq(u64 val, volatile void *addr) +{ + asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr)); +} + +static __always_inline u64 __raw_readq(const volatile void *addr) +{ + u64 val; + asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) +#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) +#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) #define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));}) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) + static inline void local_irq_enable(void) { @@ -192,6 +255,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -225,8 +298,90 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + +void test_wants_mte(void); +void test_disable_default_vgic(void); + +bool vm_supports_el2(struct kvm_vm *vm); + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) +{ + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + +#define MAPPED_EL2_SYSREG(el2, el1) \ + case SYS_##el1: \ + if (vcpu_has_el2(vcpu)) \ + alias = SYS_##el2; \ + break + + +static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) +{ + u32 alias = encoding; + + BUILD_BUG_ON(!__builtin_constant_p(encoding)); + + switch (encoding) { + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); + case SYS_SP_EL1: + if (!vcpu_has_el2(vcpu)) + return ARM64_CORE_REG(sp_el1); + + alias = SYS_SP_EL2; + break; + default: + BUILD_BUG(); + } + + return KVM_ARM64_SYS_REG(alias); +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); + +static inline unsigned int get_current_el(void) +{ + return (read_sysreg(CurrentEL) >> 2) & 0x3; +} -uint32_t guest_get_vcpuid(void); +#define do_smccc(...) \ +do { \ + if (get_current_el() == 2) \ + smccc_smc(__VA_ARGS__); \ + else \ + smccc_hvc(__VA_ARGS__); \ +} while (0) #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h index cf0984106d14..cf0984106d14 100644 --- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h +++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index 0ac6f05c63f9..688beccc9436 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -16,8 +16,10 @@ ((uint64_t)(flags) << 12) | \ index) -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa); +bool kvm_supports_vgic_v3(void); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +void __vgic_v3_init(int fd); +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 @@ -33,4 +35,6 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +int vgic_its_setup(struct kvm_vm *vm); + #endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h index b691df33e64e..63f5167397cc 100644 --- a/tools/testing/selftests/kvm/include/guest_modes.h +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -11,8 +11,8 @@ struct guest_mode { extern struct guest_mode guest_modes[NUM_VM_MODES]; -#define guest_mode_append(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +#define guest_mode_append(mode, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \ }) void guest_modes_append_default(void); diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h new file mode 100644 index 000000000000..d4e613162bba --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_SYSCALLS_H +#define SELFTEST_KVM_SYSCALLS_H + +#include <sys/syscall.h> + +#define MAP_ARGS0(m,...) +#define MAP_ARGS1(m,t,a,...) m(t,a) +#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) +#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__) +#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__) +#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__) +#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__) +#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__) + +#define __DECLARE_ARGS(t, a) t a +#define __UNPACK_ARGS(t, a) a + +#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args) +#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args) + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* Define a kvm_<syscall>() API to assert success. */ +#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \ +{ \ + int r; \ + \ + r = name(UNPACK_ARGS(nr_args, args)); \ + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \ +} + +/* + * Macro to define syscall APIs, either because KVM selftests doesn't link to + * the standard library, e.g. libnuma, or because there is no library that yet + * provides the syscall. These + */ +#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline long name(DECLARE_ARGS(nr_args, args)) \ +{ \ + return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \ +} \ +__KVM_SYSCALL_DEFINE(name, nr_args, args) + +/* + * Special case mmap(), as KVM selftest rarely/never specific an address, + * rarely specify an offset, and because the unique return code requires + * special handling anyways. + */ +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline int kvm_dup(int fd) +{ + int new_fd = dup(fd); + + TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd)); + return new_fd; +} + +__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size); +__KVM_SYSCALL_DEFINE(close, 1, int, fd); +__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len); +__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length); + +#endif /* SELFTEST_KVM_SYSCALLS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_test_harness.h b/tools/testing/selftests/kvm/include/kvm_test_harness.h new file mode 100644 index 000000000000..8f7c6858e8e2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_test_harness.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Macros for defining a KVM test + * + * Copyright (C) 2022, Google LLC. + */ + +#ifndef SELFTEST_KVM_TEST_HARNESS_H +#define SELFTEST_KVM_TEST_HARNESS_H + +#include "kselftest_harness.h" + +#define KVM_ONE_VCPU_TEST_SUITE(name) \ + FIXTURE(name) { \ + struct kvm_vcpu *vcpu; \ + }; \ + \ + FIXTURE_SETUP(name) { \ + (void)vm_create_with_one_vcpu(&self->vcpu, NULL); \ + } \ + \ + FIXTURE_TEARDOWN(name) { \ + kvm_vm_free(self->vcpu->vm); \ + } + +#define KVM_ONE_VCPU_TEST(suite, test, guestcode) \ +static void __suite##_##test(struct kvm_vcpu *vcpu); \ + \ +TEST_F(suite, test) \ +{ \ + vcpu_arch_set_entry_point(self->vcpu, guestcode); \ + __suite##_##test(self->vcpu); \ +} \ +static void __suite##_##test(struct kvm_vcpu *vcpu) + +#endif /* SELFTEST_KVM_TEST_HARNESS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c9286811a4cb..81f4355ff28a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1,13 +1,1278 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H -#include "kvm_util_base.h" -#include "ucall_common.h" +#include "test_util.h" + +#include <linux/compiler.h> +#include "linux/hashtable.h" +#include "linux/list.h" +#include <linux/kernel.h> +#include <linux/kvm.h> +#include "linux/rbtree.h" +#include <linux/types.h> + +#include <asm/atomic.h> +#include <asm/kvm.h> + +#include <sys/eventfd.h> +#include <sys/ioctl.h> + +#include <pthread.h> + +#include "kvm_syscalls.h" +#include "kvm_util_arch.h" +#include "kvm_util_types.h" +#include "sparsebit.h" + +#define KVM_DEV_PATH "/dev/kvm" +#define KVM_MAX_VCPUS 512 + +#define NSEC_PER_SEC 1000000000L + +struct userspace_mem_region { + struct kvm_userspace_memory_region2 region; + struct sparsebit *unused_phy_pages; + struct sparsebit *protected_phy_pages; + int fd; + off_t offset; + enum vm_mem_backing_src_type backing_src_type; + void *host_mem; + void *host_alias; + void *mmap_start; + void *mmap_alias; + size_t mmap_size; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; +}; + +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + +struct kvm_vcpu { + struct list_head list; + uint32_t id; + int fd; + struct kvm_vm *vm; + struct kvm_run *run; +#ifdef __x86_64__ + struct kvm_cpuid2 *cpuid; +#endif +#ifdef __aarch64__ + struct kvm_vcpu_init init; +#endif + struct kvm_binary_stats stats; + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; +}; + +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + +struct kvm_vm { + int mode; + unsigned long type; + int kvm_fd; + int fd; + unsigned int pgtable_levels; + unsigned int page_size; + unsigned int page_shift; + unsigned int pa_bits; + unsigned int va_bits; + uint64_t max_gfn; + struct list_head vcpus; + struct userspace_mem_regions regions; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool has_irqchip; + bool pgd_created; + vm_paddr_t ucall_mmio_addr; + vm_paddr_t pgd; + vm_vaddr_t handlers; + uint32_t dirty_ring_size; + uint64_t gpa_tag_mask; + + struct kvm_vm_arch arch; + + struct kvm_binary_stats stats; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; +}; + +struct vcpu_reg_sublist { + const char *name; + long capability; + int feature; + int feature_type; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; + __u64 *skips_set; + __u64 skips_set_n; +}; + +struct vcpu_reg_list { + char *name; + struct vcpu_reg_sublist sublists[]; +}; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) + +#define kvm_for_each_vcpu(vm, i, vcpu) \ + for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ + if (!((vcpu) = vm->vcpus[i])) \ + continue; \ + else + +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_16K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_16K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_16K, + VM_MODE_P40V48_64K, + VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + VM_MODE_P36V48_4K, + VM_MODE_P36V48_16K, + VM_MODE_P36V48_64K, + VM_MODE_P47V47_16K, + VM_MODE_P36V47_16K, + NUM_VM_MODES, +}; + +struct vm_shape { + uint32_t type; + uint8_t mode; + uint8_t pad0; + uint16_t pad1; +}; + +kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); + +#define VM_TYPE_DEFAULT 0 + +#define VM_SHAPE(__mode) \ +({ \ + struct vm_shape shape = { \ + .mode = (__mode), \ + .type = VM_TYPE_DEFAULT \ + }; \ + \ + shape; \ +}) + +#if defined(__aarch64__) + +extern enum vm_guest_mode vm_mode_default; + +#define VM_MODE_DEFAULT vm_mode_default +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__x86_64__) + +#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + +#elif defined(__riscv) + +#if __riscv_xlen == 32 +#error "RISC-V 32-bit kvm selftests not supported" +#endif + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__loongarch__) +#define VM_MODE_DEFAULT VM_MODE_P47V47_16K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#endif + +#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) + +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); +int open_path_or_exit(const char *path, int flags); +int open_kvm_dev_path_or_exit(void); + +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); + +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} + +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} + +unsigned int kvm_check_cap(long cap); + +static inline bool kvm_has_cap(long cap) +{ + return kvm_check_cap(cap); +} + +/* + * Use the "inner", double-underscore macro when reporting errors from within + * other macros so that the name of ioctl() and not its literal numeric value + * is printed on error. The "outer" macro is strongly preferred when reporting + * errors "directly", i.e. without an additional layer of macros, as it reduces + * the probability of passing in the wrong string. + */ +#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) +#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) + +#define kvm_do_ioctl(fd, cmd, arg) \ +({ \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ + ioctl(fd, cmd, arg); \ +}) + +#define __kvm_ioctl(kvm_fd, cmd, arg) \ + kvm_do_ioctl(kvm_fd, cmd, arg) + +#define kvm_ioctl(kvm_fd, cmd, arg) \ +({ \ + int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ +}) + +static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } + +#define __vm_ioctl(vm, cmd, arg) \ +({ \ + static_assert_is_vm(vm); \ + kvm_do_ioctl((vm)->fd, cmd, arg); \ +}) + +/* + * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if + * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, + * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before + * selftests existed and (b) should never outright fail, i.e. is supposed to + * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the + * VM and its vCPUs, including KVM_CHECK_EXTENSION. + */ +#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ +do { \ + int __errno = errno; \ + \ + static_assert_is_vm(vm); \ + \ + if (cond) \ + break; \ + \ + if (errno == EIO && \ + __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ + TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ + TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ + } \ + errno = __errno; \ + TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ +} while (0) + +#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ + __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) + +#define vm_ioctl(vm, cmd, arg) \ +({ \ + int ret = __vm_ioctl(vm, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ +}) + +static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } + +#define __vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + static_assert_is_vcpu(vcpu); \ + kvm_do_ioctl((vcpu)->fd, cmd, arg); \ +}) + +#define vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + int ret = __vcpu_ioctl(vcpu, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ +}) + +/* + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +static inline int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); + + TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); + return ret; +} + +static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} +static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, + uint64_t size, uint64_t attributes) +{ + struct kvm_memory_attributes attr = { + .attributes = attributes, + .address = gpa, + .size = size, + .flags = 0, + }; + + /* + * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows + * need significant enhancements to support multiple attributes. + */ + TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, + "Update me to support multiple attributes!"); + + vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); +} + + +static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, 0); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, + bool punch_hole); + +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, true); +} + +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, false); +} + +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); + +void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp); +void kvm_vm_release(struct kvm_vm *vmp); +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); +int kvm_memfd_alloc(size_t size, bool hugepages); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) +{ + struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; + + vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); +} + +static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, + .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; + + vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); +} + +static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); +} + +static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone); +} + +static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone); +} + +static inline int vm_get_stats_fd(struct kvm_vm *vm) +{ + int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); + return fd; +} + +static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + +static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) +{ + ssize_t ret; + + ret = pread(stats_fd, header, sizeof(*header), 0); + TEST_ASSERT(ret == sizeof(*header), + "Failed to read '%lu' header bytes, ret = '%ld'", + sizeof(*header), ret); +} + +struct kvm_stats_desc *read_stats_descriptors(int stats_fd, + struct kvm_stats_header *header); + +static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) +{ + /* + * The base size of the descriptor is defined by KVM's ABI, but the + * size of the name field is variable, as far as KVM's ABI is + * concerned. For a given instance of KVM, the name field is the same + * size for all stats and is provided in the overall stats header. + */ + return sizeof(struct kvm_stats_desc) + header->name_size; +} + +static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, + int index, + struct kvm_stats_header *header) +{ + /* + * Note, size_desc includes the size of the name field, which is + * variable. i.e. this is NOT equivalent to &stats_desc[i]. + */ + return (void *)stats + index * get_stats_descriptor_size(header); +} + +void read_stat_data(int stats_fd, struct kvm_stats_header *header, + struct kvm_stats_desc *desc, uint64_t *data, + size_t max_elements); + +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements); + +#define __get_stat(stats, stat) \ +({ \ + uint64_t data; \ + \ + kvm_get_stat(stats, #stat, &data, 1); \ + data; \ +}) + +#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) +#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) + +static inline bool read_smt_control(char *buf, size_t buf_size) +{ + FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r"); + bool ret; + + if (!f) + return false; + + ret = fread(buf, sizeof(*buf), buf_size, f) > 0; + fclose(f); + + return ret; +} + +static inline bool is_smt_possible(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && + (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12))) + return false; + + return true; +} + +static inline bool is_smt_on(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2)) + return true; + + return false; +} + +void vm_create_irqchip(struct kvm_vm *vm); + +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + +void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd_fd, uint64_t guest_memfd_offset); + +#ifndef vm_arch_has_protected_memory +static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) +{ + return false; +} +#endif + +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + unsigned int npages); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); + +#ifndef vcpu_arch_put_guest +#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0) +#endif + +static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) +{ + return gpa & ~vm->gpa_tag_mask; +} + +void vcpu_run(struct kvm_vcpu *vcpu); +int _vcpu_run(struct kvm_vcpu *vcpu); + +static inline int __vcpu_run(struct kvm_vcpu *vcpu) +{ + return __vcpu_ioctl(vcpu, KVM_RUN, NULL); +} + +void vcpu_run_complete_io(struct kvm_vcpu *vcpu); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); + +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, + uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *debug) +{ + vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); +} + +static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); +} +static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); +} + +static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_GET_REGS, regs); +} + +static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_SET_REGS, regs); +} +static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); + +} +static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); +} +static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); +} + +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} +static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) +{ + uint64_t val; + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + + vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); + return val; +} +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); + + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} + +#ifdef __KVM_HAVE_VCPU_EVENTS +static inline void vcpu_events_get(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); +} +static inline void vcpu_events_set(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); +} +#endif +#ifdef __x86_64__ +static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); +} +static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} + +static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} +#endif +static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) +{ + int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); + return fd; +} + +int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); + +static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = __kvm_has_device_attr(dev_fd, group, attr); + + TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); +} + +int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_get(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_get(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); +} + +int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_set(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_set(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); +} + +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + return __kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_create_device(struct kvm_vm *vm, uint64_t type); + +static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +{ + int fd = __kvm_create_device(vm, type); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); + return fd; +} + +void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num input parameters for the function at @vcpu's entry point, + * per the C calling convention of the architecture, to the values given as + * variable args. Each of the variable args is expected to be of type uint64_t. + * The maximum @num can be is specific to the architecture. + */ +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); + +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); + +#define KVM_MAX_IRQ_ROUTES 4096 + +struct kvm_irq_routing *kvm_gsi_routing_create(void); +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin); +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); + +const char *exit_reason_str(unsigned int exit_reason); + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot, + bool protected); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); + +static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot) +{ + /* + * By default, allocate memory as protected for VMs that support + * protected memory, as the majority of memory for such VMs is + * protected, i.e. using shared memory is effectively opt-in. + */ + return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, + vm_arch_has_protected_memory(vm)); +} + +/* + * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also + * loads the test binary into guest memory and creates an IRQ chip (x86 only). + * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to + * calculate the amount of memory needed for per-vCPU data, e.g. stacks. + */ +struct kvm_vm *____vm_create(struct vm_shape shape); +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, + uint64_t nr_extra_pages); + +static inline struct kvm_vm *vm_create_barebones(void) +{ + return ____vm_create(VM_SHAPE_DEFAULT); +} + +static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = type, + }; + + return ____vm_create(shape); +} + +static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +{ + return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); +} + +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, + uint64_t extra_mem_pages, + void *guest_code, struct kvm_vcpu *vcpus[]); + +static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, + void *guest_code, + struct kvm_vcpu *vcpus[]) +{ + return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, + guest_code, vcpus); +} + + +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); + +/* + * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages + * additional pages of guest memory. Returns the VM and vCPU (via out param). + */ +static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, + extra_mem_pages, guest_code); +} + +static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_with_one_vcpu(vcpu, 0, guest_code); +} + +static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); +} + +struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); + +void kvm_set_files_rlimit(uint32_t nr_vcpus); + +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + +void kvm_print_vcpu_pinning_help(void); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; +} + +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent); + +static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent) +{ + vcpu_arch_dump(stream, vcpu, indent); +} + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - The id of the VCPU to add to the VM. + */ +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); + +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) +{ + struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); + + vcpu_arch_set_entry_point(vcpu, guest_code); + + return vcpu; +} + +/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); + +static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, + uint32_t vcpu_id) +{ + return vm_arch_vcpu_recreate(vm, vcpu_id); +} + +void vcpu_arch_free(struct kvm_vcpu *vcpu); + +void virt_arch_pgd_alloc(struct kvm_vm *vm); + +static inline void virt_pgd_alloc(struct kvm_vm *vm) +{ + virt_arch_pgd_alloc(vm); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + virt_arch_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); +} + + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_arch_gva2gpa(vm, gva); +} + +/* + * Virtual Translation Tables Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps to the FILE stream given by @stream, the contents of all the + * virtual translation tables for the VM given by @vm. + */ +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + virt_arch_dump(stream, vm, indent); +} + + +static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) +{ + return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); +} + +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); +void kvm_arch_vm_release(struct kvm_vm *vm); + +bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); + +uint32_t guest_get_vcpuid(void); + +bool kvm_arch_has_default_irqchip(void); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h deleted file mode 100644 index eb1ff597bcca..000000000000 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ /dev/null @@ -1,914 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/kvm_util_base.h - * - * Copyright (C) 2018, Google LLC. - */ -#ifndef SELFTEST_KVM_UTIL_BASE_H -#define SELFTEST_KVM_UTIL_BASE_H - -#include "test_util.h" - -#include <linux/compiler.h> -#include "linux/hashtable.h" -#include "linux/list.h" -#include <linux/kernel.h> -#include <linux/kvm.h> -#include "linux/rbtree.h" - -#include <asm/atomic.h> - -#include <sys/ioctl.h> - -#include "sparsebit.h" - -/* - * Provide a version of static_assert() that is guaranteed to have an optional - * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) - * #undefs and #defines static_assert() as a direct alias to _Static_assert(), - * i.e. effectively makes the message mandatory. Many KVM selftests #define - * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As - * a result, static_assert() behavior is non-deterministic and may or may not - * require a message depending on #include order. - */ -#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) -#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) - -#define KVM_DEV_PATH "/dev/kvm" -#define KVM_MAX_VCPUS 512 - -#define NSEC_PER_SEC 1000000000L - -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ - -struct userspace_mem_region { - struct kvm_userspace_memory_region region; - struct sparsebit *unused_phy_pages; - int fd; - off_t offset; - enum vm_mem_backing_src_type backing_src_type; - void *host_mem; - void *host_alias; - void *mmap_start; - void *mmap_alias; - size_t mmap_size; - struct rb_node gpa_node; - struct rb_node hva_node; - struct hlist_node slot_node; -}; - -struct kvm_vcpu { - struct list_head list; - uint32_t id; - int fd; - struct kvm_vm *vm; - struct kvm_run *run; -#ifdef __x86_64__ - struct kvm_cpuid2 *cpuid; -#endif - struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; -}; - -struct userspace_mem_regions { - struct rb_root gpa_tree; - struct rb_root hva_tree; - DECLARE_HASHTABLE(slot_hash, 9); -}; - -enum kvm_mem_region_type { - MEM_REGION_CODE, - MEM_REGION_DATA, - MEM_REGION_PT, - MEM_REGION_TEST_DATA, - NR_MEM_REGIONS, -}; - -struct kvm_vm { - int mode; - unsigned long type; - int kvm_fd; - int fd; - unsigned int pgtable_levels; - unsigned int page_size; - unsigned int page_shift; - unsigned int pa_bits; - unsigned int va_bits; - uint64_t max_gfn; - struct list_head vcpus; - struct userspace_mem_regions regions; - struct sparsebit *vpages_valid; - struct sparsebit *vpages_mapped; - bool has_irqchip; - bool pgd_created; - vm_paddr_t ucall_mmio_addr; - vm_paddr_t pgd; - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; - vm_vaddr_t handlers; - uint32_t dirty_ring_size; - - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; - - /* - * KVM region slots. These are the default memslots used by page - * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] - * memslot. - */ - uint32_t memslots[NR_MEM_REGIONS]; -}; - - -#define kvm_for_each_vcpu(vm, i, vcpu) \ - for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ - if (!((vcpu) = vm->vcpus[i])) \ - continue; \ - else - -struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); - -static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, - enum kvm_mem_region_type type) -{ - assert(type < NR_MEM_REGIONS); - return memslot2region(vm, vm->memslots[type]); -} - -/* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - -#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 - -enum vm_guest_mode { - VM_MODE_P52V48_4K, - VM_MODE_P52V48_64K, - VM_MODE_P48V48_4K, - VM_MODE_P48V48_16K, - VM_MODE_P48V48_64K, - VM_MODE_P40V48_4K, - VM_MODE_P40V48_16K, - VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ - VM_MODE_P47V64_4K, - VM_MODE_P44V64_4K, - VM_MODE_P36V48_4K, - VM_MODE_P36V48_16K, - VM_MODE_P36V48_64K, - VM_MODE_P36V47_16K, - NUM_VM_MODES, -}; - -#if defined(__aarch64__) - -extern enum vm_guest_mode vm_mode_default; - -#define VM_MODE_DEFAULT vm_mode_default -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__x86_64__) - -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__s390x__) - -#define VM_MODE_DEFAULT VM_MODE_P44V64_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 16) - -#elif defined(__riscv) - -#if __riscv_xlen == 32 -#error "RISC-V 32-bit kvm selftests not supported" -#endif - -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#endif - -#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) -#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) - -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; -extern const struct vm_guest_mode_params vm_guest_mode_params[]; - -int open_path_or_exit(const char *path, int flags); -int open_kvm_dev_path_or_exit(void); - -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); - -unsigned int kvm_check_cap(long cap); - -static inline bool kvm_has_cap(long cap) -{ - return kvm_check_cap(cap); -} - -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - -#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) -#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) - -#define kvm_do_ioctl(fd, cmd, arg) \ -({ \ - kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ - ioctl(fd, cmd, arg); \ -}) - -#define __kvm_ioctl(kvm_fd, cmd, arg) \ - kvm_do_ioctl(kvm_fd, cmd, arg) - - -#define _kvm_ioctl(kvm_fd, cmd, name, arg) \ -({ \ - int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ - \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ -}) - -#define kvm_ioctl(kvm_fd, cmd, arg) \ - _kvm_ioctl(kvm_fd, cmd, #cmd, arg) - -static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } - -#define __vm_ioctl(vm, cmd, arg) \ -({ \ - static_assert_is_vm(vm); \ - kvm_do_ioctl((vm)->fd, cmd, arg); \ -}) - -#define _vm_ioctl(vm, cmd, name, arg) \ -({ \ - int ret = __vm_ioctl(vm, cmd, arg); \ - \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ -}) - -#define vm_ioctl(vm, cmd, arg) \ - _vm_ioctl(vm, cmd, #cmd, arg) - - -static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } - -#define __vcpu_ioctl(vcpu, cmd, arg) \ -({ \ - static_assert_is_vcpu(vcpu); \ - kvm_do_ioctl((vcpu)->fd, cmd, arg); \ -}) - -#define _vcpu_ioctl(vcpu, cmd, name, arg) \ -({ \ - int ret = __vcpu_ioctl(vcpu, cmd, arg); \ - \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ -}) - -#define vcpu_ioctl(vcpu, cmd, arg) \ - _vcpu_ioctl(vcpu, cmd, #cmd, arg) - -/* - * Looks up and returns the value corresponding to the capability - * (KVM_CAP_*) given by cap. - */ -static inline int vm_check_cap(struct kvm_vm *vm, long cap) -{ - int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); - - TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); - return ret; -} - -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} - -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); - -void kvm_vm_free(struct kvm_vm *vmp); -void kvm_vm_restart(struct kvm_vm *vmp); -void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); -int kvm_memfd_alloc(size_t size, bool hugepages); - -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) -{ - struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; - - vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); -} - -static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) -{ - struct kvm_clear_dirty_log args = { - .dirty_bitmap = log, - .slot = slot, - .first_page = first_page, - .num_pages = num_pages - }; - - vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); -} - -static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) -{ - return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); -} - -static inline int vm_get_stats_fd(struct kvm_vm *vm) -{ - int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); - return fd; -} - -static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) -{ - ssize_t ret; - - ret = pread(stats_fd, header, sizeof(*header), 0); - TEST_ASSERT(ret == sizeof(*header), - "Failed to read '%lu' header bytes, ret = '%ld'", - sizeof(*header), ret); -} - -struct kvm_stats_desc *read_stats_descriptors(int stats_fd, - struct kvm_stats_header *header); - -static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) -{ - /* - * The base size of the descriptor is defined by KVM's ABI, but the - * size of the name field is variable, as far as KVM's ABI is - * concerned. For a given instance of KVM, the name field is the same - * size for all stats and is provided in the overall stats header. - */ - return sizeof(struct kvm_stats_desc) + header->name_size; -} - -static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, - int index, - struct kvm_stats_header *header) -{ - /* - * Note, size_desc includes the size of the name field, which is - * variable. i.e. this is NOT equivalent to &stats_desc[i]. - */ - return (void *)stats + index * get_stats_descriptor_size(header); -} - -void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, - size_t max_elements); - -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); - -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; - - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} - -void vm_create_irqchip(struct kvm_vm *vm); - -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); - -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); - -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); - -void vcpu_run(struct kvm_vcpu *vcpu); -int _vcpu_run(struct kvm_vcpu *vcpu); - -static inline int __vcpu_run(struct kvm_vcpu *vcpu) -{ - return __vcpu_ioctl(vcpu, KVM_RUN, NULL); -} - -void vcpu_run_complete_io(struct kvm_vcpu *vcpu); -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); - -static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, - uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); -} - -static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *debug) -{ - vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); -} - -static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); -} -static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); -} - -static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_GET_REGS, regs); -} - -static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_SET_REGS, regs); -} -static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); - -} -static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); -} -static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); -} - -static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} - -#ifdef __KVM_HAVE_VCPU_EVENTS -static inline void vcpu_events_get(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); -} -static inline void vcpu_events_set(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); -} -#endif -#ifdef __x86_64__ -static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); -} -static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} - -static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} -#endif -static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) -{ - int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); - return fd; -} - -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); - -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) -{ - int ret = __kvm_has_device_attr(dev_fd, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); -} - -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_get(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_get(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); -} - -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_set(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_set(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); -} - -static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - return __kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); -int __kvm_create_device(struct kvm_vm *vm, uint64_t type); - -static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) -{ - int fd = __kvm_create_device(vm, type); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); - return fd; -} - -void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); - -/* - * VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first @num input parameters for the function at @vcpu's entry point, - * per the C calling convention of the architecture, to the values given as - * variable args. Each of the variable args is expected to be of type uint64_t. - * The maximum @num can be is specific to the architecture. - */ -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); - -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); - -#define KVM_MAX_IRQ_ROUTES 4096 - -struct kvm_irq_routing *kvm_gsi_routing_create(void); -void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin); -int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); -void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); - -const char *exit_reason_str(unsigned int exit_reason); - -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); - -/* - * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also - * loads the test binary into guest memory and creates an IRQ chip (x86 only). - * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to - * calculate the amount of memory needed for per-vCPU data, e.g. stacks. - */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode); -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages); - -static inline struct kvm_vm *vm_create_barebones(void) -{ - return ____vm_create(VM_MODE_DEFAULT); -} - -static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) -{ - return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); -} - -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t extra_mem_pages, - void *guest_code, struct kvm_vcpu *vcpus[]); - -static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, - void *guest_code, - struct kvm_vcpu *vcpus[]) -{ - return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0, - guest_code, vcpus); -} - -/* - * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages - * additional pages of guest memory. Returns the VM and vCPU (via out param). - */ -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code); - -static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - void *guest_code) -{ - return __vm_create_with_one_vcpu(vcpu, 0, guest_code); -} - -struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); - -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); -void kvm_print_vcpu_pinning_help(void); -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], - int nr_vcpus); - -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); -unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); -unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); -static inline unsigned int -vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) -{ - unsigned int n; - n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); -#ifdef __s390x__ - /* s390 requires 1M aligned guest sizes */ - n = (n + 255) & ~255; -#endif - return n; -} - -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end); - -#define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(_p, &(g), sizeof(g)); \ -}) - -#define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(&(g), _p, sizeof(g)); \ -}) - -/* - * Write a global value, but only in the VM's (guest's) domain. Primarily used - * for "globals" that hold per-VM values (VMs always duplicate code and global - * data into their own region of physical memory), but can be used anytime it's - * undesirable to change the host's copy of the global. - */ -#define write_guest_global(vm, g, val) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - typeof(g) _val = val; \ - \ - memcpy(_p, &(_val), sizeof(g)); \ -}) - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent); - -static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent) -{ - vcpu_arch_dump(stream, vcpu, indent); -} - -/* - * Adds a vCPU with reasonable defaults (e.g. a stack) - * - * Input Args: - * vm - Virtual Machine - * vcpu_id - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point - */ -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code); - -static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) -{ - return vm_arch_vcpu_add(vm, vcpu_id, guest_code); -} - -/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); - -static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) -{ - return vm_arch_vcpu_recreate(vm, vcpu_id); -} - -void vcpu_arch_free(struct kvm_vcpu *vcpu); - -void virt_arch_pgd_alloc(struct kvm_vm *vm); - -static inline void virt_pgd_alloc(struct kvm_vm *vm) -{ - virt_arch_pgd_alloc(vm); -} - -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. - */ -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); - -static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -{ - virt_arch_pg_map(vm, vaddr, paddr); -} - - -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gva - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Returns the VM physical address of the translated VM virtual - * address given by @gva. - */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); - -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - return addr_arch_gva2gpa(vm, gva); -} - -/* - * Virtual Translation Tables Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps to the FILE stream given by @stream, the contents of all the - * virtual translation tables for the VM given by @vm. - */ -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - virt_arch_dump(stream, vm, indent); -} - - -static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) -{ - return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); -} - -/* - * Arch hook that is invoked via a constructor, i.e. before exeucting main(), - * to allow for arch-specific setup that is common to all tests, e.g. computing - * the default guest "mode". - */ -void kvm_selftest_arch_init(void); - -void kvm_arch_vm_post_create(struct kvm_vm *vm); - -#endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h new file mode 100644 index 000000000000..ec787b97cf18 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_TYPES_H +#define SELFTEST_KVM_UTIL_TYPES_H + +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE + * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and + * #defines static_assert() as a direct alias to _Static_assert() (see + * usr/include/assert.h). Define a custom macro instead of redefining + * static_assert() to avoid creating non-deterministic behavior that is + * dependent on include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +#endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h new file mode 100644 index 000000000000..2ed106b32c81 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch Constant Timer specific interface + */ +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" + +/* LoongArch timer frequency is constant 100MHZ */ +#define TIMER_FREQ (100UL << 20) +#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000) +#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000) +#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ) + +static inline unsigned long timer_get_cycles(void) +{ + unsigned long val = 0; + + __asm__ __volatile__( + "rdtime.d %0, $zero\n\t" + : "=r"(val) + : + ); + + return val; +} + +static inline unsigned long timer_get_cfg(void) +{ + return csr_read(LOONGARCH_CSR_TCFG); +} + +static inline unsigned long timer_get_val(void) +{ + return csr_read(LOONGARCH_CSR_TVAL); +} + +static inline void disable_timer(void) +{ + csr_write(0, LOONGARCH_CSR_TCFG); +} + +static inline void timer_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) +{ + unsigned long val; + + val = msec_to_cycles(msec) & CSR_TCFG_VAL; + val |= CSR_TCFG_EN; + if (period) + val |= CSR_TCFG_PERIOD; + csr_write(val, LOONGARCH_CSR_TCFG); +} + +static inline void __delay(uint64_t cycles) +{ + uint64_t start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h new file mode 100644 index 000000000000..76840ddda57d --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#ifndef __ASSEMBLER__ +#include "ucall_common.h" + +#else +/* general registers */ +#define zero $r0 +#define ra $r1 +#define tp $r2 +#define sp $r3 +#define a0 $r4 +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 +#define s0 $r23 +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 +#endif + +/* + * LoongArch page table entry definition + * Original header file arch/loongarch/include/asm/loongarch.h + */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 + +#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT) +#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT) +#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT) +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) +/* Coherent Cached */ +#define _CACHE_CC BIT_ULL(_CACHE_SHIFT) +#define PS_4K 0x0000000c +#define PS_16K 0x0000000e +#define PS_64K 0x00000010 +#define PS_DEFAULT_SIZE PS_16K + +/* LoongArch Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT) +#define PLV_MASK 0x3 +#define LOONGARCH_CSR_PRMD 0x1 +#define LOONGARCH_CSR_EUEN 0x2 +#define LOONGARCH_CSR_ECFG 0x4 +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER)) +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT) +#define EXCCODE_INT 0 /* Interrupt */ +#define INT_TI 11 /* Timer interrupt*/ +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ +#define LOONGARCH_CSR_EENTRY 0xc +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */ +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define LOONGARCH_CSR_PGDL 0x19 +#define LOONGARCH_CSR_PGDH 0x1a +/* Page table base */ +#define LOONGARCH_CSR_PGD 0x1b +#define LOONGARCH_CSR_PWCTL0 0x1c +#define LOONGARCH_CSR_PWCTL1 0x1d +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define LOONGARCH_CSR_CPUID 0x20 +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_TMID 0x40 +#define LOONGARCH_CSR_TCFG 0x41 +#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2)) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (0x1UL) +#define LOONGARCH_CSR_TVAL 0x42 +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) +/* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 +#define LOONGARCH_CSR_TLBRSAVE 0x8b +#define LOONGARCH_CSR_TLBREHI 0x8e +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "csrrd %[val], %[reg]\n\t" \ + : [val] "=r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define csr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "csrwr %[val], %[reg]\n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define EXREGS_GPRS (32) + +#ifndef __ASSEMBLER__ +void handle_tlb_refill(void); +void handle_exception(void); + +struct ex_regs { + unsigned long regs[EXREGS_GPRS]; + unsigned long pc; + unsigned long estat; + unsigned long badv; + unsigned long prmd; +}; + +#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) +#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) +#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) +#define EXREGS_SIZE sizeof(struct ex_regs) + +#define VECTOR_NUM 64 + +typedef void(*handler_fn)(struct ex_regs *); + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM]; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler); + +static inline void cpu_relax(void) +{ + asm volatile("nop" ::: "memory"); +} + +static inline void local_irq_enable(void) +{ + unsigned int flags = CSR_CRMD_IE; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void local_irq_disable(void) +{ + unsigned int flags = 0; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} +#else +#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) +#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) +#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) +#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8) +#endif + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h new file mode 100644 index 000000000000..d32ff5d8ffd0 --- /dev/null +++ b/tools/testing/selftests/kvm/include/lru_gen_util.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output. + * + * Copyright (C) 2025, Google LLC. + */ +#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H +#define SELFTEST_KVM_LRU_GEN_UTIL_H + +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> + +#include "test_util.h" + +#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */ +#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */ + +#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen" +#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled" +#define LRU_GEN_ENABLED 1 +#define LRU_GEN_MM_WALK 2 + +struct generation_stats { + int gen; + long age_ms; + long nr_anon; + long nr_file; +}; + +struct node_stats { + int node; + int nr_gens; /* Number of populated gens entries. */ + struct generation_stats gens[MAX_NR_GENS]; +}; + +struct memcg_stats { + unsigned long memcg_id; + int nr_nodes; /* Number of populated nodes entries. */ + struct node_stats nodes[MAX_NR_NODES]; +}; + +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg); +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats); +long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats); +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg); +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long total_pages); +bool lru_gen_usable(void); + +#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index ce4e603050ea..9071eb6dea60 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -62,7 +62,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, void memstress_destroy_vm(struct kvm_vm *vm); void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); void memstress_set_random_access(struct kvm_vm *vm, bool random_access); void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h index b020547403fd..29572a6d789c 100644 --- a/tools/testing/selftests/kvm/include/numaif.h +++ b/tools/testing/selftests/kvm/include/numaif.h @@ -1,55 +1,83 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/numaif.h - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Header file that provides access to NUMA API functions not explicitly - * exported to user space. - */ +/* Copyright (C) 2020, Google LLC. */ #ifndef SELFTEST_KVM_NUMAIF_H #define SELFTEST_KVM_NUMAIF_H -#define __NR_get_mempolicy 239 -#define __NR_migrate_pages 256 +#include <dirent.h> -/* System calls */ -long get_mempolicy(int *policy, const unsigned long *nmask, - unsigned long maxnode, void *addr, int flags) +#include <linux/mempolicy.h> + +#include "kvm_syscalls.h" + +KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask, + unsigned long, maxnode, void *, addr, int, flags); + +KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask, + unsigned long, maxnode); + +KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start, + unsigned long, len, unsigned long, home_node, + unsigned long, flags); + +KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode, + const unsigned long *, frommask, const unsigned long *, tomask); + +KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages, + const int *, nodes, int *, status, int, flags); + +KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode, + const unsigned long *, nodemask, unsigned long, maxnode, + unsigned int, flags); + +static inline int get_max_numa_node(void) { - return syscall(__NR_get_mempolicy, policy, nmask, - maxnode, addr, flags); + struct dirent *de; + int max_node = 0; + DIR *d; + + /* + * Assume there's a single node if the kernel doesn't support NUMA, + * or if no nodes are found. + */ + d = opendir("/sys/devices/system/node"); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + int node_id; + char *endptr; + + if (strncmp(de->d_name, "node", 4) != 0) + continue; + + node_id = strtol(de->d_name + 4, &endptr, 10); + if (*endptr != '\0') + continue; + + if (node_id > max_node) + max_node = node_id; + } + closedir(d); + + return max_node; } -long migrate_pages(int pid, unsigned long maxnode, - const unsigned long *frommask, - const unsigned long *tomask) +static bool is_numa_available(void) { - return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); + /* + * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall + * fails with ENOSYS, then the kernel was built without NUMA support. + * if the syscall fails with EPERM, then the process/user lacks the + * necessary capabilities (CAP_SYS_NICE). + */ + return !get_mempolicy(NULL, NULL, 0, NULL, 0) || + (errno != ENOSYS && errno != EPERM); } -/* Policies */ -#define MPOL_DEFAULT 0 -#define MPOL_PREFERRED 1 -#define MPOL_BIND 2 -#define MPOL_INTERLEAVE 3 - -#define MPOL_MAX MPOL_INTERLEAVE - -/* Flags for get_mem_policy */ -#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ - /* Warning: MPOL_F_NODE is unsupported and - * subject to change. Don't use. - */ -#define MPOL_F_ADDR (1<<1) /* look up vma using address */ -#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ - -/* Flags for mbind */ -#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +static inline bool is_multi_numa_node_system(void) +{ + return is_numa_available() && get_max_numa_node() >= 1; +} #endif /* SELFTEST_KVM_NUMAIF_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h new file mode 100644 index 000000000000..225d81dad064 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RISC-V Arch Timer(sstc) specific interface + * + * Copyright (c) 2024 Intel Corporation + */ + +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include <asm/csr.h> +#include <asm/vdso/processor.h> + +static unsigned long timer_freq; + +#define msec_to_cycles(msec) \ + ((timer_freq) * (uint64_t)(msec) / 1000) + +#define usec_to_cycles(usec) \ + ((timer_freq) * (uint64_t)(usec) / 1000000) + +#define cycles_to_usec(cycles) \ + ((uint64_t)(cycles) * 1000000 / (timer_freq)) + +static inline uint64_t timer_get_cycles(void) +{ + return csr_read(CSR_TIME); +} + +static inline void timer_set_cmp(uint64_t cval) +{ + csr_write(CSR_STIMECMP, cval); +} + +static inline uint64_t timer_get_cmp(void) +{ + return csr_read(CSR_STIMECMP); +} + +static inline void timer_irq_enable(void) +{ + csr_set(CSR_SIE, IE_TIE); +} + +static inline void timer_irq_disable(void) +{ + csr_clear(CSR_SIE, IE_TIE); +} + +static inline void timer_set_next_cmp_ms(uint32_t msec) +{ + uint64_t now_ct = timer_get_cycles(); + uint64_t next_ct = now_ct + msec_to_cycles(msec); + + timer_set_cmp(next_ct); +} + +static inline void __delay(uint64_t cycles) +{ + uint64_t start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} + +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index d00d213c3805..e58282488beb 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -7,13 +7,28 @@ #ifndef SELFTEST_KVM_PROCESSOR_H #define SELFTEST_KVM_PROCESSOR_H -#include "kvm_util.h" #include <linux/stringify.h> +#include <asm/csr.h> +#include <asm/vdso/processor.h> +#include "kvm_util.h" + +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_FUNCT3 0x7000 +#define INSN_SHIFT_FUNCT3 12 -static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, - uint64_t size) +#define INSN_CSR_MASK 0xfff00000 +#define INSN_CSR_SHIFT 20 + +#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) +#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) + +static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, + uint64_t idx, uint64_t size) { - return KVM_REG_RISCV | type | idx | size; + return KVM_REG_RISCV | type | subtype | idx | size; } #if __riscv_xlen == 64 @@ -22,21 +37,96 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, #define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32 #endif -#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \ - KVM_REG_RISCV_CONFIG_REG(name), \ - KVM_REG_SIZE_ULONG) +#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, 0, \ + KVM_REG_RISCV_CONFIG_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, 0, \ + KVM_REG_RISCV_CORE_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_GENERAL_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \ + KVM_REG_RISCV_CSR_GENERAL, \ + KVM_REG_RISCV_CSR_REG(name), \ + KVM_REG_SIZE_ULONG) + +#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, 0, \ + KVM_REG_RISCV_TIMER_REG(name), \ + KVM_REG_SIZE_U64) + +#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \ + KVM_REG_RISCV_ISA_SINGLE, \ + idx, KVM_REG_SIZE_ULONG) + +#define RISCV_SBI_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_SBI_EXT, \ + KVM_REG_RISCV_SBI_SINGLE, \ + idx, KVM_REG_SIZE_ULONG) + +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext); + +static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext)); +} + +static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); +} + +struct pt_regs { + unsigned long epc; + unsigned long ra; + unsigned long sp; + unsigned long gp; + unsigned long tp; + unsigned long t0; + unsigned long t1; + unsigned long t2; + unsigned long s0; + unsigned long s1; + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + unsigned long s2; + unsigned long s3; + unsigned long s4; + unsigned long s5; + unsigned long s6; + unsigned long s7; + unsigned long s8; + unsigned long s9; + unsigned long s10; + unsigned long s11; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; + /* Supervisor/Machine CSRs */ + unsigned long status; + unsigned long badaddr; + unsigned long cause; + /* a0 value before the syscall */ + unsigned long orig_a0; +}; + +#define NR_VECTORS 2 +#define NR_EXCEPTIONS 32 +#define EC_MASK (NR_EXCEPTIONS - 1) -#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \ - KVM_REG_RISCV_CORE_REG(name), \ - KVM_REG_SIZE_ULONG) +typedef void(*exception_handler_fn)(struct pt_regs *); -#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \ - KVM_REG_RISCV_CSR_REG(name), \ - KVM_REG_SIZE_ULONG) +void vm_init_vector_tables(struct kvm_vm *vm); +void vcpu_init_vector_tables(struct kvm_vcpu *vcpu); -#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \ - KVM_REG_RISCV_TIMER_REG(name), \ - KVM_REG_SIZE_U64) +void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler); + +void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler); /* L3 index Bit[47:39] */ #define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL @@ -92,28 +182,14 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, #define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE #define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT -#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) -#define SATP_MODE_39 _AC(0x8000000000000000, UL) -#define SATP_MODE_48 _AC(0x9000000000000000, UL) -#define SATP_ASID_BITS 16 -#define SATP_ASID_SHIFT 44 -#define SATP_ASID_MASK _AC(0xFFFF, UL) - -#define SBI_EXT_EXPERIMENTAL_START 0x08000000 -#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF - -#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END -#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 -#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 - -struct sbiret { - long error; - long value; -}; +static inline void local_irq_enable(void) +{ + csr_set(CSR_SSTATUS, SR_SIE); +} -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); +static inline void local_irq_disable(void) +{ + csr_clear(CSR_SSTATUS, SR_SIE); +} #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h new file mode 100644 index 000000000000..046b432ae896 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/sbi.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RISC-V SBI specific definitions + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef SELFTEST_KVM_SBI_H +#define SELFTEST_KVM_SBI_H + +/* SBI spec version fields */ +#define SBI_SPEC_VERSION_DEFAULT 0x1 +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff + +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 + +#define SBI_EXT_EXPERIMENTAL_START 0x08000000 +#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF + +#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END +#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 +#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 + +enum sbi_ext_id { + SBI_EXT_BASE = 0x10, + SBI_EXT_STA = 0x535441, + SBI_EXT_PMU = 0x504D55, +}; + +enum sbi_ext_base_fid { + SBI_EXT_BASE_GET_SPEC_VERSION = 0, + SBI_EXT_BASE_GET_IMP_ID, + SBI_EXT_BASE_GET_IMP_VERSION, + SBI_EXT_BASE_PROBE_EXT = 3, +}; +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, +}; + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + +struct sbiret { + long error; + long value; +}; + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, +}; + +/* SBI PMU counter types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +bool guest_sbi_probe_extension(int extid, long *out_val); + +/* Make SBI version */ +static inline unsigned long sbi_mk_version(unsigned long major, + unsigned long minor) +{ + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); +} + +unsigned long get_host_sbi_spec_version(void); + +#endif /* SELFTEST_KVM_SBI_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h new file mode 100644 index 000000000000..a695ae36f3e0 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "processor.h" +#include "sbi.h" + +#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, + KVM_RISCV_SELFTESTS_SBI_UCALL, + uc, 0, 0, 0, 0, 0); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..b0ed71302722 100644 --- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include <linux/bitops.h> + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h index 255c9b990f4c..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390/processor.h @@ -21,10 +21,21 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + /* Is there a portable way to do this? */ static inline void cpu_relax(void) { barrier(); } +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + #endif diff --git a/tools/testing/selftests/kvm/include/s390/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + * Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include <linux/types.h> + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h new file mode 100644 index 000000000000..8035a872a351 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390/ucall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h index 12a9a4b9cead..bc760761e1a3 100644 --- a/tools/testing/selftests/kvm/include/sparsebit.h +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -30,26 +30,26 @@ typedef uint64_t sparsebit_num_t; struct sparsebit *sparsebit_alloc(void); void sparsebit_free(struct sparsebit **sbitp); -void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); +void sparsebit_copy(struct sparsebit *dstp, const struct sparsebit *src); -bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); -bool sparsebit_is_set_num(struct sparsebit *sbit, +bool sparsebit_is_set(const struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_set_num(const struct sparsebit *sbit, sparsebit_idx_t idx, sparsebit_num_t num); -bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); -bool sparsebit_is_clear_num(struct sparsebit *sbit, +bool sparsebit_is_clear(const struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_clear_num(const struct sparsebit *sbit, sparsebit_idx_t idx, sparsebit_num_t num); -sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); -bool sparsebit_any_set(struct sparsebit *sbit); -bool sparsebit_any_clear(struct sparsebit *sbit); -bool sparsebit_all_set(struct sparsebit *sbit); -bool sparsebit_all_clear(struct sparsebit *sbit); -sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); -sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); -sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); -sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); -sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, +sparsebit_num_t sparsebit_num_set(const struct sparsebit *sbit); +bool sparsebit_any_set(const struct sparsebit *sbit); +bool sparsebit_any_clear(const struct sparsebit *sbit); +bool sparsebit_all_set(const struct sparsebit *sbit); +bool sparsebit_all_clear(const struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_set(const struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *sbit); +sparsebit_idx_t sparsebit_next_set(const struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *sbit, sparsebit_idx_t start, sparsebit_num_t num); -sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, +sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *sbit, sparsebit_idx_t start, sparsebit_num_t num); void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); @@ -62,9 +62,29 @@ void sparsebit_clear_num(struct sparsebit *sbitp, sparsebit_idx_t start, sparsebit_num_t num); void sparsebit_clear_all(struct sparsebit *sbitp); -void sparsebit_dump(FILE *stream, struct sparsebit *sbit, +void sparsebit_dump(FILE *stream, const struct sparsebit *sbit, unsigned int indent); -void sparsebit_validate_internal(struct sparsebit *sbit); +void sparsebit_validate_internal(const struct sparsebit *sbit); + +/* + * Iterate over an inclusive ranges within sparsebit @s. In each iteration, + * @range_begin and @range_end will take the beginning and end of the set + * range, which are of type sparsebit_idx_t. + * + * For example, if the range [3, 7] (inclusive) is set, within the + * iteration,@range_begin will take the value 3 and @range_end will take + * the value 7. + * + * Ensure that there is at least one bit set before using this macro with + * sparsebit_any_set(), because sparsebit_first_set() will abort if none + * are set. + */ +#define sparsebit_for_each_set_range(s, range_begin, range_end) \ + for (range_begin = sparsebit_first_set(s), \ + range_end = sparsebit_next_clear(s, range_begin) - 1; \ + range_begin && range_end; \ + range_begin = sparsebit_next_set(s, range_end), \ + range_end = sparsebit_next_clear(s, range_begin) - 1) #ifdef __cplusplus } diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index a6e9f215ce70..b4872ba8ed12 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include <setjmp.h> +#include <signal.h> #include <stdlib.h> #include <stdarg.h> #include <stdbool.h> @@ -20,7 +22,9 @@ #include <sys/mman.h> #include "kselftest.h" -static inline int _no_printf(const char *format, ...) { return 0; } +#define msecs_to_usecs(msec) ((msec) * 1000ULL) + +static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } #ifdef DEBUG #define pr_debug(...) printf(__VA_ARGS__) @@ -33,7 +37,7 @@ static inline int _no_printf(const char *format, ...) { return 0; } #define pr_info(...) _no_printf(__VA_ARGS__) #endif -void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void __printf(1, 2) print_skip(const char *fmt, ...); #define __TEST_REQUIRE(f, fmt, ...) \ do { \ if (!(f)) \ @@ -46,21 +50,20 @@ ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); int test_seq_read(const char *path, char **bufp, size_t *sizep); -void test_assert(bool exp, const char *exp_str, - const char *file, unsigned int line, const char *fmt, ...) - __attribute__((format(printf, 5, 6))); +void __printf(5, 6) test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, + const char *fmt, ...); #define TEST_ASSERT(e, fmt, ...) \ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) -#define ASSERT_EQ(a, b) do { \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - TEST_ASSERT(__a == __b, \ - "ASSERT_EQ(%s, %s) failed.\n" \ - "\t%s is %#lx\n" \ - "\t%s is %#lx", \ - #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ +#define TEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \ + "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b);\ } while (0) #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \ @@ -77,6 +80,23 @@ void test_assert(bool exp, const char *exp_str, __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); @@ -90,9 +110,28 @@ struct guest_random_state { uint32_t seed; }; +extern uint32_t guest_random_seed; +extern struct guest_random_state guest_rng; + struct guest_random_state new_guest_random_state(uint32_t seed); uint32_t guest_random_u32(struct guest_random_state *state); +static inline bool __guest_random_bool(struct guest_random_state *state, + uint8_t percent) +{ + return (guest_random_u32(state) % 100) < percent; +} + +static inline bool guest_random_bool(struct guest_random_state *state) +{ + return __guest_random_bool(state, 50); +} + +static inline uint64_t guest_random_u64(struct guest_random_state *state) +{ + return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state); +} + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, @@ -133,6 +172,7 @@ bool is_backing_src_hugetlb(uint32_t i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void); /* * Whether or not the given source type is shared memory (as opposed to @@ -143,6 +183,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) +{ + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; +} + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static inline uint64_t align_up(uint64_t x, uint64_t size) { @@ -186,4 +231,11 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str) return num; } +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args); +__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...); + +char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); + +char *sys_get_cur_clocksource(void); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h new file mode 100644 index 000000000000..9b6edaafe6d4 --- /dev/null +++ b/tools/testing/selftests/kvm/include/timer_test.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * timer test specific header + * + * Copyright (C) 2018, Google LLC + */ + +#ifndef SELFTEST_KVM_TIMER_TEST_H +#define SELFTEST_KVM_TIMER_TEST_H + +#include "kvm_util.h" + +#define NR_VCPUS_DEF 4 +#define NR_TEST_ITERS_DEF 5 +#define TIMER_TEST_PERIOD_MS_DEF 10 +#define TIMER_TEST_ERR_MARGIN_US 100 +#define TIMER_TEST_MIGRATION_FREQ_MS 2 + +/* Timer test cmdline parameters */ +struct test_args { + uint32_t nr_vcpus; + uint32_t nr_iter; + uint32_t timer_period_ms; + uint32_t migration_freq_ms; + uint32_t timer_err_margin_us; + /* Members of struct kvm_arm_counter_offset */ + uint64_t counter_offset; + uint64_t reserved; +}; + +/* Shared variables between host and guest */ +struct test_vcpu_shared_data { + uint32_t nr_iter; + int guest_stage; + uint64_t xcnt; +}; + +extern struct test_args test_args; +extern struct kvm_vcpu *vcpus[]; +extern struct test_vcpu_shared_data vcpu_shared_data[]; + +struct kvm_vm *test_vm_create(void); +void test_vm_cleanup(struct kvm_vm *vm); + +#endif /* SELFTEST_KVM_TIMER_TEST_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 1a6aaef5ccae..d9d6581b8d4f 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,27 +1,29 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H #define SELFTEST_KVM_UCALL_COMMON_H #include "test_util.h" +#include "ucall.h" /* Common ucalls */ enum { UCALL_NONE, UCALL_SYNC, UCALL_ABORT, + UCALL_PRINTF, UCALL_DONE, UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 7 +#define UCALL_BUFFER_LEN 1024 struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + char buffer[UCALL_BUFFER_LEN]; /* Host virtual address of this struct. */ struct ucall *hva; @@ -32,8 +34,13 @@ void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); +__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...); +__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, + const char *file, unsigned int line, + const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +int ucall_nr_pages_required(uint64_t page_size); /* * Perform userspace call without any associated data. This bare call avoids @@ -46,8 +53,22 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0) +#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1) +#define GUEST_SYNC3(arg0, arg1, arg2) \ + ucall(UCALL_SYNC, 3, arg0, arg1, arg2) +#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \ + ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3) +#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4) +#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \ + ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5) + +#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args) #define GUEST_DONE() ucall(UCALL_DONE, 0) +#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer) + enum guest_assert_builtin_args { GUEST_ERROR_STRING, GUEST_FILE, @@ -55,70 +76,41 @@ enum guest_assert_builtin_args { GUEST_ASSERT_BUILTIN_NARGS }; -#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \ -do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \ - "Failed guest assert: " _condstr, \ - __FILE__, __LINE__, ##_args); \ +#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \ +do { \ + if (!(_condition)) \ + ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \ } while (0) -#define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT(_condition, #_condition, 0, 0) - -#define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) - -#define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) - -#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) - -#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) - -#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) +#define __GUEST_ASSERT(_condition, _fmt, _args...) \ + ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args) -#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \ - TEST_FAIL("%s at %s:%ld\n" fmt, \ - (const char *)(_ucall).args[GUEST_ERROR_STRING], \ - (const char *)(_ucall).args[GUEST_FILE], \ - (_ucall).args[GUEST_LINE], \ - ##_args) +#define GUEST_ASSERT(_condition) \ + __GUEST_ASSERT(_condition, #_condition) -#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i]) +#define GUEST_FAIL(_fmt, _args...) \ + ucall_assert(UCALL_ABORT, "Unconditional guest failure", \ + __FILE__, __LINE__, _fmt, ##_args) -#define REPORT_GUEST_ASSERT(ucall) \ - __REPORT_GUEST_ASSERT((ucall), "") - -#define REPORT_GUEST_ASSERT_1(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0)) - -#define REPORT_GUEST_ASSERT_2(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1)) - -#define REPORT_GUEST_ASSERT_3(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2)) +#define GUEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_4(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2), \ - GUEST_ASSERT_ARG((ucall), 3)) +#define GUEST_ASSERT_NE(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \ - __REPORT_GUEST_ASSERT((ucall), fmt, ##args) +#define REPORT_GUEST_ASSERT(ucall) \ + test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \ + (const char *)(ucall).args[GUEST_FILE], \ + (ucall).args[GUEST_LINE], "%s", (ucall).buffer) #endif /* SELFTEST_KVM_UCALL_COMMON_H */ diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h index 877449c34592..60f7f9d435dc 100644 --- a/tools/testing/selftests/kvm/include/userfaultfd_util.h +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -5,9 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <time.h> #include <pthread.h> @@ -17,17 +14,27 @@ typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); -struct uffd_desc { +struct uffd_reader_args { int uffd_mode; int uffd; - int pipefds[2]; useconds_t delay; uffd_handler_t handler; - pthread_t thread; + /* Holds the read end of the pipe for killing the reader. */ + int pipe; +}; + +struct uffd_desc { + int uffd; + uint64_t num_readers; + /* Holds the write ends of the pipes for killing the readers. */ + int *pipefds; + pthread_t *readers; + struct uffd_reader_args *reader_args; }; struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler); void uffd_stop_demand_paging(struct uffd_desc *uffd); diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index bed316fdecd5..80fe9f69b38d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/apic.h - * * Copyright (C) 2021, Google LLC. */ @@ -11,6 +9,7 @@ #include <stdint.h> #include "processor.h" +#include "ucall_common.h" #define APIC_DEFAULT_GPA 0xfee00000ULL @@ -60,6 +59,14 @@ #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 #define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 void apic_disable(void); void xapic_enable(void); @@ -85,9 +92,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +{ + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); +} + static inline void x2apic_write_reg(unsigned int reg, uint64_t value) { - wrmsr(APIC_BASE_MSR + (reg >> 4), value); + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); } +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + #endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 901caf0e0939..5a74bb30e2f8 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/evmcs.h - * * Copyright (C) 2018, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_EVMCS_H diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index fa65b908b13e..f13e532be240 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -1,9 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * tools/testing/selftests/kvm/include/x86_64/hyperv.h - * * Copyright (C) 2021, Red Hat, Inc. - * */ #ifndef SELFTEST_KVM_HYPERV_H @@ -186,6 +183,18 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ #define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) @@ -343,4 +352,10 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h new file mode 100644 index 000000000000..972bb1c4ab4c --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +#include <stdbool.h> +#include <stdint.h> + +#include "kvm_util_types.h" +#include "test_util.h" + +extern bool is_forced_emulation_enabled; + +struct kvm_vm_arch { + vm_vaddr_t gdt; + vm_vaddr_t tss; + vm_vaddr_t idt; + + uint64_t c_bit; + uint64_t s_bit; + int sev_fd; + bool is_pt_protected; +}; + +static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) +{ + return arch->c_bit || arch->s_bit; +} + +#define vm_arch_has_protected_memory(vm) \ + __vm_arch_has_protected_memory(&(vm)->arch) + +#define vcpu_arch_put_guest(mem, __val) \ +do { \ + const typeof(mem) val = (__val); \ + \ + if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \ + (mem) = val; \ + } else if (guest_random_bool(&guest_rng)) { \ + __asm__ __volatile__(KVM_FEP "mov %1, %0" \ + : "+m" (mem) \ + : "r" (val) : "memory"); \ + } else { \ + uint64_t __old = READ_ONCE(mem); \ + \ + __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ + : [ptr] "+m" (mem), [old] "+a" (__old) \ + : [new]"r" (val) : "memory", "cc"); \ + } \ +} while (0) + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h index 6119321f3f5d..295f2d554754 100644 --- a/tools/testing/selftests/kvm/include/x86_64/mce.h +++ b/tools/testing/selftests/kvm/include/x86/mce.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/mce.h - * * Copyright (C) 2022, Google LLC. */ diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h new file mode 100644 index 000000000000..72575eadb63a --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023, Tencent, Inc. + */ +#ifndef SELFTEST_KVM_PMU_H +#define SELFTEST_KVM_PMU_H + +#include <stdbool.h> +#include <stdint.h> + +#include <linux/bits.h> + +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 + +/* + * Encode an eventsel+umask pair into event-select MSR format. Note, this is + * technically AMD's format, as Intel's format only supports 8 bits for the + * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing + * in '0' is a nop and won't clobber the CMASK. + */ +#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \ + ((eventsel) & 0xff) | \ + ((umask) & 0xff) << 8) + +/* + * These are technically Intel's definitions, but except for CMASK (see above), + * AMD's layout is compatible with Intel's. + */ +#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0) +#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8) +#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16) +#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17) +#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18) +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19) +#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20) +#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21) +#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22) +#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) +#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) + +/* RDPMC control flags, Intel only. */ +#define INTEL_RDPMC_METRICS BIT_ULL(29) +#define INTEL_RDPMC_FIXED BIT_ULL(30) +#define INTEL_RDPMC_FAST BIT_ULL(31) + +/* Fixed PMC controls, Intel only. */ +#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx))) + +#define FIXED_PMC_KERNEL BIT_ULL(0) +#define FIXED_PMC_USER BIT_ULL(1) +#define FIXED_PMC_ANYTHREAD BIT_ULL(2) +#define FIXED_PMC_ENABLE_PMI BIT_ULL(3) +#define FIXED_PMC_NR_BITS 4 +#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS)) + +#define PMU_CAP_FW_WRITES BIT_ULL(13) +#define PMU_CAP_LBR_FMT 0x3f + +#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00) +#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) +#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01) +#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f) +#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41) +#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) +#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) +#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) +#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02) +#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00) +#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01) +#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02) +#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01) + +#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) +#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) +#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00) +#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00) + +/* + * Note! The order and thus the index of the architectural events matters as + * support for each event is enumerated via CPUID using the index of the event. + */ +enum intel_pmu_architectural_events { + INTEL_ARCH_CPU_CYCLES_INDEX, + INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX, + INTEL_ARCH_REFERENCE_CYCLES_INDEX, + INTEL_ARCH_LLC_REFERENCES_INDEX, + INTEL_ARCH_LLC_MISSES_INDEX, + INTEL_ARCH_BRANCHES_RETIRED_INDEX, + INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, + INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX, + INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_RETIRING_INDEX, + INTEL_ARCH_LBR_INSERTS_INDEX, + NR_INTEL_ARCH_EVENTS, +}; + +enum amd_pmu_zen_events { + AMD_ZEN_CORE_CYCLES_INDEX, + AMD_ZEN_INSTRUCTIONS_INDEX, + AMD_ZEN_BRANCHES_INDEX, + AMD_ZEN_BRANCH_MISSES_INDEX, + NR_AMD_ZEN_EVENTS, +}; + +extern const uint64_t intel_pmu_arch_events[]; +extern const uint64_t amd_pmu_zen_events[]; + +enum pmu_errata { + INSTRUCTIONS_RETIRED_OVERCOUNT, + BRANCHES_RETIRED_OVERCOUNT, +}; +extern uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void); + +static inline bool this_pmu_has_errata(enum pmu_errata errata) +{ + return pmu_errata_mask & BIT_ULL(errata); +} + +#endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index aa434c8f19c5..57d62a425109 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/processor.h - * * Copyright (C) 2018, Google LLC. */ @@ -15,15 +13,29 @@ #include <asm/msr-index.h> #include <asm/prctl.h> +#include <linux/kvm_para.h> #include <linux/stringify.h> -#include "../kvm_util.h" +#include "kvm_util.h" +#include "ucall_common.h" extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; +extern uint64_t guest_tsc_khz; + +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + +#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull + +/* Forced emulation prefix, used to invoke the emulator unconditionally. */ +#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" #define NMI_VECTOR 0x02 +const char *ex_str(int vector); + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -68,6 +80,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +165,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -166,6 +185,9 @@ struct kvm_x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23) +#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24) +#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28) #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) @@ -180,8 +202,12 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4) +#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) +#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) /* * KVM defined paravirt features. @@ -239,7 +265,12 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) #define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) +#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) #define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) @@ -260,7 +291,11 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) +#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) +#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3) +#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -269,24 +304,46 @@ struct kvm_x86_cpu_property { * that indicates the feature is _not_ supported, and a property that states * the length of the bit mask of unsupported features. A feature is supported * if the size of the bit mask is larger than the "unavailable" bit, and said - * bit is not set. + * bit is not set. Fixed counters also bizarre enumeration, but inverted from + * arch events for general purpose counters. Fixed counters are supported if a + * feature flag is set **OR** the total number of fixed counters is greater + * than index of the counter. * - * Wrap the "unavailable" feature to simplify checking whether or not a given - * architectural event is supported. + * Wrap the events for general purpose and fixed counters to simplify checking + * whether or not a given architectural event is supported. */ struct kvm_x86_pmu_feature { - struct kvm_x86_cpu_feature anti_feature; + struct kvm_x86_cpu_feature f; }; -#define KVM_X86_PMU_FEATURE(name, __bit) \ -({ \ - struct kvm_x86_pmu_feature feature = { \ - .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit), \ - }; \ - \ - feature; \ +#define KVM_X86_PMU_FEATURE(__reg, __bit) \ +({ \ + struct kvm_x86_pmu_feature feature = { \ + .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \ + }; \ + \ + kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \ + KVM_CPUID_##__reg == KVM_CPUID_ECX); \ + feature; \ }) -#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5) +#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0) +#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1) +#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2) +#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3) +#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4) +#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) +#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) +#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) +#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8) +#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9) +#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10) +#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11) +#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12) + +#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) +#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) +#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2) +#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3) static inline unsigned int x86_family(unsigned int eax) { @@ -530,6 +587,11 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline void set_idt(const struct desc_ptr *idt_desc) +{ + __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); +} + static inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -548,6 +610,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; @@ -678,10 +747,16 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + uint32_t nr_bits; - return nr_bits > feature.anti_feature.bit && - !this_cpu_has(feature.anti_feature); + if (feature.f.reg == KVM_CPUID_EBX) { + nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + return nr_bits > feature.f.bit && !this_cpu_has(feature.f); + } + + GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX); + nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + return nr_bits > feature.f.bit || this_cpu_has(feature.f); } static __always_inline uint64_t this_cpu_supported_xcr0(void) @@ -767,6 +842,23 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } +static inline void udelay(unsigned long usec) +{ + uint64_t start, now, cycles; + + GUEST_ASSERT(guest_tsc_khz); + cycles = guest_tsc_khz / 1000 * usec; + + /* + * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is + * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. + */ + start = rdtsc(); + do { + now = rdtsc(); + } while (now - start < cycles); +} + #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ @@ -841,8 +933,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); static inline uint32_t kvm_cpu_fms(void) { @@ -897,10 +987,25 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + uint32_t nr_bits; + + if (feature.f.reg == KVM_CPUID_EBX) { + nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); + return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f); + } + + TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX); + nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + return nr_bits > feature.f.bit || kvm_cpu_has(feature.f); +} + +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; - return nr_bits > feature.anti_feature.bit && - !kvm_cpu_has(feature.anti_feature); + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } static inline size_t kvm_cpuid2_size(int nr_entries) @@ -927,12 +1032,20 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) } void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -953,7 +1066,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -963,12 +1076,25 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } +void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_property property, + uint32_t value); void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); + +static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + return *((&entry->eax) + feature.reg) & BIT(feature.bit); +} + void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_feature feature, bool set); @@ -1030,7 +1156,7 @@ do { \ } while (0) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); +void kvm_init_vm_address_properties(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -1057,11 +1183,15 @@ struct idt_entry { uint32_t offset2; uint32_t reserved; }; -void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be + * used to signal "no expcetion". + */ +#define KVM_MAGIC_DE_VECTOR 0xff + /* If a toddler were to say "abracadabra". */ #define KVM_EXCEPTION_MAGIC 0xabacadabaULL @@ -1091,16 +1221,19 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, * r9 = exception vector (non-zero) * r10 = error code */ -#define KVM_ASM_SAFE(insn) \ +#define __KVM_ASM_SAFE(insn, fep) \ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ "lea 1f(%%rip), %%r10\n\t" \ "lea 2f(%%rip), %%r11\n\t" \ - "1: " insn "\n\t" \ + fep "1: " insn "\n\t" \ "xor %%r9, %%r9\n\t" \ "2:\n\t" \ "mov %%r9b, %[vector]\n\t" \ "mov %%r10, %[error_code]\n\t" +#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "") +#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP) + #define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec) #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" @@ -1127,21 +1260,58 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, vector; \ }) -static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) -{ - uint64_t error_code; - uint8_t vector; - uint32_t a, d; +#define kvm_asm_safe_fep(insn, inputs...) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) - asm volatile(KVM_ASM_SAFE("rdmsr") - : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code) - : "c"(msr) - : KVM_ASM_SAFE_CLOBBERS); +#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) - *val = (uint64_t)a | ((uint64_t)d << 32); - return vector; +#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ +static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ +{ \ + uint64_t error_code; \ + uint8_t vector; \ + uint32_t a, d; \ + \ + asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ + : "=a"(a), "=d"(d), \ + KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ + : "c"(idx) \ + : KVM_ASM_SAFE_CLOBBERS); \ + \ + *val = (uint64_t)a | ((uint64_t)d << 32); \ + return vector; \ } +/* + * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that + * use ECX as in input index, and EDX:EAX as a 64-bit output. + */ +#define BUILD_READ_U64_SAFE_HELPERS(insn) \ + BUILD_READ_U64_SAFE_HELPER(insn, , ) \ + BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ + +BUILD_READ_U64_SAFE_HELPERS(rdmsr) +BUILD_READ_U64_SAFE_HELPERS(rdpmc) +BUILD_READ_U64_SAFE_HELPERS(xgetbv) + static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) { return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); @@ -1157,6 +1327,46 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + +static inline bool kvm_is_pmu_enabled(void) +{ + return get_kvm_param_bool("enable_pmu"); +} + +static inline bool kvm_is_forced_emulation_enabled(void) +{ + return !!get_kvm_param_integer("force_emulation_prefix"); +} + +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + +static inline bool kvm_is_ignore_msrs(void) +{ + return get_kvm_param_bool("ignore_msrs"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); @@ -1166,6 +1376,60 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, + uint64_t size, uint64_t flags) +{ + return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); +} + +static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, + uint64_t flags) +{ + uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + + GUEST_ASSERT(!ret); +} + +/* + * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's + * intended to be a wake event arrives *after* HLT is executed. Modern CPUs, + * except for a few oddballs that KVM is unlikely to run on, block IRQs for one + * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may + * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too. + */ +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +/* + * Enable interrupts and ensure that interrupts are evaluated upon return from + * this function, i.e. execute a nop to consume the STi interrupt shadow. + */ +static inline void sti_nop(void) +{ + asm volatile ("sti; nop"); +} + +/* + * Enable interrupts for one instruction (nop), to allow the CPU to process all + * interrupts that are already pending. + */ +static inline void sti_nop_cli(void) +{ + asm volatile ("sti; nop; cli"); +} + +static inline void sti(void) +{ + asm volatile("sti"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ @@ -1177,7 +1441,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, - PG_LEVEL_NUM + PG_LEVEL_256T }; #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) @@ -1228,4 +1492,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +bool sys_clocksource_is_based_on_tsc(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h new file mode 100644 index 000000000000..008b4169f5e2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Helpers used for SEV guests + * + */ +#ifndef SELFTEST_KVM_SEV_H +#define SELFTEST_KVM_SEV_H + +#include <stdint.h> +#include <stdbool.h> + +#include "linux/psp-sev.h" + +#include "kvm_util.h" +#include "svm_util.h" +#include "processor.h" + +enum sev_guest_state { + SEV_GUEST_STATE_UNINITIALIZED = 0, + SEV_GUEST_STATE_LAUNCH_UPDATE, + SEV_GUEST_STATE_LAUNCH_SECRET, + SEV_GUEST_STATE_RUNNING, +}; + +#define SEV_POLICY_NO_DBG (1UL << 0) +#define SEV_POLICY_ES (1UL << 2) + +#define SNP_POLICY_SMT (1ULL << 16) +#define SNP_POLICY_RSVD_MBO (1ULL << 17) +#define SNP_POLICY_DBG (1ULL << 19) + +#define GHCB_MSR_TERM_REQ 0x100 + +static inline bool is_sev_snp_vm(struct kvm_vm *vm) +{ + return vm->type == KVM_X86_SNP_VM; +} + +static inline bool is_sev_es_vm(struct kvm_vm *vm) +{ + return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM; +} + +static inline bool is_sev_vm(struct kvm_vm *vm) +{ + return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM; +} + +void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); +void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); +void sev_vm_launch_finish(struct kvm_vm *vm); +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy); +void snp_vm_launch_update(struct kvm_vm *vm); +void snp_vm_launch_finish(struct kvm_vm *vm); + +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, + struct kvm_vcpu **cpu); +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement); + +kvm_static_assert(SEV_RET_SUCCESS == 0); + +/* + * A SEV-SNP VM requires the policy reserved bit to always be set. + * The SMT policy bit is also required to be set based on SMT being + * available and active on the system. + */ +static inline u64 snp_default_policy(void) +{ + return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0); +} + +/* + * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" + * instead of a proper struct. The size of the parameter is embedded in the + * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by + * creating an overlay to pass in an "unsigned long" without a cast (casting + * will make the compiler unhappy due to dereferencing an aliased pointer). + */ +#define __vm_sev_ioctl(vm, cmd, arg) \ +({ \ + int r; \ + \ + union { \ + struct kvm_sev_cmd c; \ + unsigned long raw; \ + } sev_cmd = { .c = { \ + .id = (cmd), \ + .data = (uint64_t)(arg), \ + .sev_fd = (vm)->arch.sev_fd, \ + } }; \ + \ + r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \ + r ?: sev_cmd.c.error; \ +}) + +#define vm_sev_ioctl(vm, cmd, arg) \ +({ \ + int ret = __vm_sev_ioctl(vm, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ +}) + +void sev_vm_init(struct kvm_vm *vm); +void sev_es_vm_init(struct kvm_vm *vm); +void snp_vm_init(struct kvm_vm *vm); + +static inline void vmgexit(void) +{ + __asm__ __volatile__("rep; vmmcall"); +} + +static inline void sev_register_encrypted_memory(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + struct kvm_enc_region range = { + .addr = region->region.userspace_addr, + .size = region->region.memory_size, + }; + + vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range); +} + +static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, + uint64_t size) +{ + struct kvm_sev_launch_update_data update_data = { + .uaddr = (unsigned long)addr_gpa2hva(vm, gpa), + .len = size, + }; + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); +} + +static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, + uint64_t hva, uint64_t size, uint8_t type) +{ + struct kvm_sev_snp_launch_update update_data = { + .uaddr = hva, + .gfn_start = gpa >> PAGE_SHIFT, + .len = size, + .type = type, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data); +} + +#endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h index 4803e1056055..29cffd0a9181 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * tools/testing/selftests/kvm/include/x86_64/svm.h - * This is a copy of arch/x86/include/asm/svm.h - * - */ - #ifndef SELFTEST_KVM_SVM_H #define SELFTEST_KVM_SVM_H diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index 044f0f872ba9..b74c6dcddcbd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -1,8 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/svm_utils.h - * Header for nested SVM testing - * * Copyright (C) 2020, Red Hat, Inc. */ diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h new file mode 100644 index 000000000000..d3825dcc3cd9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/ucall.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_IO + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +#endif diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5f0c0a29c556..96e2b4c630a9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/x86_64/vmx.h - * * Copyright (C) 2018, Google LLC. */ @@ -570,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..5d7590d01868 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + struct kvm_vcpu *unused; + int r, i; + + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index 698c1cfa3111..b7dbde9c0843 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -6,8 +6,6 @@ * * Test the fd-based interface for KVM statistics. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -241,14 +239,14 @@ int main(int argc, char *argv[]) * single stats file works and doesn't cause explosions. */ vm_stats_fds = vm_get_stats_fd(vms[i]); - stats_test(dup(vm_stats_fds)); + stats_test(kvm_dup(vm_stats_fds)); /* Verify userspace can instantiate multiple stats files. */ stats_test(vm_get_stats_fd(vms[i])); for (j = 0; j < max_vcpu; ++j) { vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]); - stats_test(dup(vcpu_stats_fds[j])); + stats_test(kvm_dup(vcpu_stats_fds[j])); stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j])); } diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 31b3cb24b9a7..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -6,13 +6,10 @@ * * Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include "test_util.h" @@ -41,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index b3b00be1ef82..dd8b12f626d3 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -8,9 +8,6 @@ * page size have been pre-allocated on your system, if you are planning to * use hugepages to back the guest memory for testing. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <time.h> @@ -21,6 +18,7 @@ #include "kvm_util.h" #include "processor.h" #include "guest_modes.h" +#include "ucall_common.h" #define TEST_MEM_SLOT_INDEX 1 @@ -200,13 +198,13 @@ static void *vcpu_worker(void *data) if (READ_ONCE(host_quit)) return NULL; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(vcpu->run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu->id); @@ -254,7 +252,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) /* Create a VM with enough guest pages */ guest_num_pages = test_mem_size / guest_page_size; - vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages, guest_code, test_args.vcpus); /* Align down GPA of the testing memslot */ @@ -367,7 +365,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Test the stage of KVM creating mappings */ *current_stage = KVM_CREATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -380,7 +378,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_UPDATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -392,7 +390,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_ADJUST_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 55668631d546..b023868fe0b8 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -17,13 +17,12 @@ static const struct gic_common_ops *gic_common_ops; static struct spinlock gic_lock; -static void gic_cpu_init(unsigned int cpu, void *redist_base) +static void gic_cpu_init(unsigned int cpu) { - gic_common_ops->gic_cpu_init(cpu, redist_base); + gic_common_ops->gic_cpu_init(cpu); } -static void -gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) +static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) { const struct gic_common_ops *gic_ops = NULL; @@ -40,7 +39,7 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) GUEST_ASSERT(gic_ops); - gic_ops->gic_init(nr_cpus, dist_base); + gic_ops->gic_init(nr_cpus); gic_common_ops = gic_ops; /* Make sure that the initialized data is visible to all the vCPUs */ @@ -49,18 +48,15 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) spin_unlock(&gic_lock); } -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base) +void gic_init(enum gic_type type, unsigned int nr_cpus) { uint32_t cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); - GUEST_ASSERT(dist_base); - GUEST_ASSERT(redist_base); GUEST_ASSERT(nr_cpus); - gic_dist_init(type, nr_cpus, dist_base); - gic_cpu_init(cpu, redist_base); + gic_dist_init(type, nr_cpus); + gic_cpu_init(cpu); } void gic_irq_enable(unsigned int intid) @@ -159,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_irq_set_config(intid, is_edge); } + +void gic_irq_set_group(unsigned int intid, bool group) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_group(intid, group); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index 75d07313c893..b6a7e30c3eb1 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -8,8 +8,8 @@ #define SELFTEST_KVM_GIC_PRIVATE_H struct gic_common_ops { - void (*gic_init)(unsigned int nr_cpus, void *dist_base); - void (*gic_cpu_init)(unsigned int cpu, void *redist_base); + void (*gic_init)(unsigned int nr_cpus); + void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); uint64_t (*gic_read_iar)(void); @@ -25,6 +25,7 @@ struct gic_common_ops { void (*gic_irq_clear_pending)(uint32_t intid); bool (*gic_irq_get_pending)(uint32_t intid); void (*gic_irq_set_config)(uint32_t intid, bool is_edge); + void (*gic_irq_set_group)(uint32_t intid, bool group); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 263bf3ed8fd5..50754a27f493 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -9,12 +9,21 @@ #include "processor.h" #include "delay.h" +#include "gic.h" #include "gic_v3.h" #include "gic_private.h" +#define GICV3_MAX_CPUS 512 + +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + +#define ICC_PMR_DEF_PRIO 0xf0 + struct gicv3_data { - void *dist_base; - void *redist_base[GICV3_MAX_CPUS]; unsigned int nr_cpus; unsigned int nr_spis; }; @@ -35,17 +44,23 @@ static void gicv3_gicd_wait_for_rwp(void) { unsigned int count = 100000; /* 1s */ - while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } } -static void gicv3_gicr_wait_for_rwp(void *redist_base) +static inline volatile void *gicr_base_cpu(uint32_t cpu) +{ + /* Align all the redistributors sequentially */ + return GICR_BASE_GVA + cpu * SZ_64K * 2; +} + +static void gicv3_gicr_wait_for_rwp(uint32_t cpu) { unsigned int count = 100000; /* 1s */ - while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) { + while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } @@ -56,7 +71,7 @@ static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); else - gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]); + gicv3_gicr_wait_for_rwp(cpu_or_dist); } static enum gicv3_intid_range get_intid_range(unsigned int intid) @@ -116,15 +131,15 @@ static void gicv3_set_eoi_split(bool split) uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } @@ -263,7 +278,7 @@ static bool gicv3_irq_get_pending(uint32_t intid) return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } -static void gicv3_enable_redist(void *redist_base) +static void gicv3_enable_redist(volatile void *redist_base) { uint32_t val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ @@ -278,23 +293,36 @@ static void gicv3_enable_redist(void *redist_base) } } -static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu) +static void gicv3_set_group(uint32_t intid, bool grp) { - /* Align all the redistributors sequentially */ - return redist_base + cpu * SZ_64K * 2; + uint32_t cpu_or_dist; + uint32_t val; + + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); + if (grp) + val |= BIT(intid % 32); + else + val &= ~BIT(intid % 32); + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); } -static void gicv3_cpu_init(unsigned int cpu, void *redist_base) +static void gicv3_cpu_init(unsigned int cpu) { - void *sgi_base; + volatile void *sgi_base; unsigned int i; - void *redist_base_cpu; + volatile void *redist_base_cpu; + u64 typer; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - redist_base_cpu = gicr_base_cpu(redist_base, cpu); + redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); + /* Verify assumption that GICR_TYPER.Processor_number == cpu */ + typer = readq_relaxed(redist_base_cpu + GICR_TYPER); + GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu); + gicv3_enable_redist(redist_base_cpu); /* @@ -310,7 +338,7 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) writel(GICD_INT_DEF_PRI_X4, sgi_base + GICR_IPRIORITYR0 + i); - gicv3_gicr_wait_for_rwp(redist_base_cpu); + gicv3_gicr_wait_for_rwp(cpu); /* Enable the GIC system register (ICC_*) access */ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE, @@ -319,19 +347,18 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) /* Set a default priority threshold */ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); + /* Disable Group-0 interrupts */ + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); /* Enable non-secure Group-1 interrupts */ - write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1); - - gicv3_data.redist_base[cpu] = redist_base_cpu; + write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } static void gicv3_dist_init(void) { - void *dist_base = gicv3_data.dist_base; unsigned int i; /* Disable the distributor until we set things up */ - writel(0, dist_base + GICD_CTLR); + writel(0, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); /* @@ -339,33 +366,32 @@ static void gicv3_dist_init(void) * Also, deactivate and disable them. */ for (i = 32; i < gicv3_data.nr_spis; i += 32) { - writel(~0, dist_base + GICD_IGROUPR + i / 8); - writel(~0, dist_base + GICD_ICACTIVER + i / 8); - writel(~0, dist_base + GICD_ICENABLER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8); } /* Set a default priority for all the SPIs */ for (i = 32; i < gicv3_data.nr_spis; i += 4) writel(GICD_INT_DEF_PRI_X4, - dist_base + GICD_IPRIORITYR + i); + GICD_BASE_GVA + GICD_IPRIORITYR + i); /* Wait for the settings to sync-in */ gicv3_gicd_wait_for_rwp(); /* Finally, enable the distributor globally with ARE */ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | - GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR); + GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); } -static void gicv3_init(unsigned int nr_cpus, void *dist_base) +static void gicv3_init(unsigned int nr_cpus) { GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS); gicv3_data.nr_cpus = nr_cpus; - gicv3_data.dist_base = dist_base; gicv3_data.nr_spis = GICD_TYPER_SPIS( - readl(gicv3_data.dist_base + GICD_TYPER)); + readl(GICD_BASE_GVA + GICD_TYPER)); if (gicv3_data.nr_spis > 1020) gicv3_data.nr_spis = 1020; @@ -395,4 +421,29 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_clear_pending = gicv3_irq_clear_pending, .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, + .gic_irq_set_group = gicv3_set_group, }; + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table) +{ + volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); + + u32 ctlr; + u64 val; + + val = (cfg_table | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK)); + writeq_relaxed(val, rdist_base + GICR_PROPBASER); + + val = (pend_table | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + writeq_relaxed(val, rdist_base + GICR_PENDBASER); + + ctlr = readl_relaxed(rdist_base + GICR_CTLR); + ctlr |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(ctlr, rdist_base + GICR_CTLR); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c new file mode 100644 index 000000000000..7f9fdcf42ae6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c + * over in the kernel tree. + */ + +#include <linux/kvm.h> +#include <linux/sizes.h> +#include <asm/kvm_para.h> +#include <asm/kvm.h> + +#include "kvm_util.h" +#include "vgic.h" +#include "gic.h" +#include "gic_v3.h" +#include "processor.h" + +#define GITS_COLLECTION_TARGET_SHIFT 16 + +static u64 its_read_u64(unsigned long offset) +{ + return readq_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u64(unsigned long offset, u64 val) +{ + writeq_relaxed(val, GITS_BASE_GVA + offset); +} + +static u32 its_read_u32(unsigned long offset) +{ + return readl_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u32(unsigned long offset, u32 val) +{ + writel_relaxed(val, GITS_BASE_GVA + offset); +} + +static unsigned long its_find_baser(unsigned int type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + u64 baser; + unsigned long offset = GITS_BASER + (i * sizeof(baser)); + + baser = its_read_u64(offset); + if (GITS_BASER_TYPE(baser) == type) + return offset; + } + + GUEST_FAIL("Couldn't find an ITS BASER of type %u", type); + return -1; +} + +static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +{ + unsigned long offset = its_find_baser(type); + u64 baser; + + baser = ((size / SZ_64K) - 1) | + GITS_BASER_PAGE_SIZE_64K | + GITS_BASER_InnerShareable | + base | + GITS_BASER_RaWaWb | + GITS_BASER_VALID; + + its_write_u64(offset, baser); +} + +static void its_install_cmdq(vm_paddr_t base, size_t size) +{ + u64 cbaser; + + cbaser = ((size / SZ_4K) - 1) | + GITS_CBASER_InnerShareable | + base | + GITS_CBASER_RaWaWb | + GITS_CBASER_VALID; + + its_write_u64(GITS_CBASER, cbaser); +} + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size) +{ + u32 ctlr; + + its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz); + its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz); + its_install_cmdq(cmdq, cmdq_size); + + ctlr = its_read_u32(GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + its_write_u32(GITS_CTLR, ctlr); +} + +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + +#define GITS_CMDQ_POLL_ITERATIONS 0 + +static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) +{ + u64 cwriter = its_read_u64(GITS_CWRITER); + struct its_cmd_block *dst = cmdq_base + cwriter; + u64 cbaser = its_read_u64(GITS_CBASER); + size_t cmdq_size; + u64 next; + int i; + + cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K; + + its_fixup_cmd(cmd); + + WRITE_ONCE(*dst, *cmd); + dsb(ishst); + next = (cwriter + sizeof(*cmd)) % cmdq_size; + its_write_u64(GITS_CWRITER, next); + + /* + * Polling isn't necessary considering KVM's ITS emulation at the time + * of writing this, as the CMDQ is processed synchronously after a write + * to CWRITER. + */ + for (i = 0; its_read_u64(GITS_CREADR) != next; i++) { + __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS, + "ITS didn't process command at offset %lu after %d iterations\n", + cwriter, i); + + cpu_relax(); + } +} + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPD); + its_encode_devid(&cmd, device_id); + its_encode_size(&cmd, ilog2(itt_size) - 1); + its_encode_itt(&cmd, itt_base); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPC); + its_encode_collection(&cmd, collection_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPTI); + its_encode_devid(&cmd, device_id); + its_encode_event_id(&cmd, event_id); + its_encode_phys_id(&cmd, intid); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_invall_cmd(void *cmdq_base, u32 collection_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_INVALL); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_SYNC); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S index 0e443eadfac6..0e443eadfac6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S +++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 3a0259e25335..d46e4b13b92c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -11,7 +11,11 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" +#include "vgic.h" + #include <linux/bitfield.h> +#include <linux/sizes.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -58,13 +62,25 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> vm->page_shift) & mask; } +static inline bool use_lpa2_pte_format(struct kvm_vm *vm) +{ + return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) && + (vm->pa_bits > 48 || vm->va_bits > 48); +} + static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) { uint64_t pte; - pte = pa & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + if (use_lpa2_pte_format(vm)) { + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; + } else { + pte = pa & PTE_ADDR_MASK(vm->page_shift); + if (vm->page_shift == 16) + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; + } pte |= attrs; return pte; @@ -74,9 +90,14 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) { uint64_t pa; - pa = pte & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + if (use_lpa2_pte_format(vm)) { + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; + } else { + pa = pte & PTE_ADDR_MASK(vm->page_shift); + if (vm->page_shift == 16) + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; + } return pa; } @@ -108,7 +129,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -127,18 +149,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -147,7 +172,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -157,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) { uint64_t *ptep; @@ -167,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -195,6 +230,11 @@ unmapped_gva: exit(EXIT_FAILURE); } +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); @@ -238,56 +278,73 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; uint64_t sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_P52V48_4K: - TEST_FAIL("AArch64 does not support 4K sized pages " - "with 52-bit physical address ranges"); - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; + case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -297,55 +354,70 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure output size */ switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); + if (use_lpa2_pte_format(vm)) + tcr_el1 |= TCR_DS; + + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); + pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); + pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); } -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_vcpu_init *init, void *guest_code) +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); +} + +static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init) { size_t stack_size; uint64_t stack_vaddr; @@ -359,16 +431,23 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); + return vcpu; +} + +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init, void *guest_code) +{ + struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); + + vcpu_arch_set_entry_point(vcpu, guest_code); return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { - return aarch64_vcpu_add(vm, vcpu_id, NULL, guest_code); + return __aarch64_vcpu_add(vm, vcpu_id, NULL); } void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) @@ -377,7 +456,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); @@ -415,14 +494,14 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) } struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; }; void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -434,7 +513,7 @@ void route_exception(struct ex_regs *regs, int vector) switch (vector) { case VECTOR_SYNC_CURRENT: case VECTOR_SYNC_LOWER_64: - ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(read_sysreg(esr_el1)); valid_ec = true; break; case VECTOR_IRQ_CURRENT: @@ -473,7 +552,7 @@ void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, assert(VECTOR_IS_SYNC(vector)); assert(vector < VECTOR_NUM); - assert(ec < ESR_EC_NUM); + assert(ec <= ESR_ELx_EC_MAX); handlers->exception_handlers[vector][ec] = handler; } @@ -492,12 +571,24 @@ uint32_t guest_get_vcpuid(void) return read_sysreg(tpidr_el1); } -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k) +static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, + uint32_t not_sup_val, uint32_t ipa52_min_val) +{ + if (gran == not_sup_val) + return 0; + else if (gran >= ipa52_min_val && vm_ipa >= 52) + return 52; + else + return min(vm_ipa, 48U); +} + +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; uint64_t val; + uint32_t gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), .addr = (uint64_t)&val, @@ -518,9 +609,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf; - *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0; - *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0; + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); + *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, + ID_AA64MMFR0_EL1_TGRAN4_52_BIT); + + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); + *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, + ID_AA64MMFR0_EL1_TGRAN64_IMP); + + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); + *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, + ID_AA64MMFR0_EL1_TGRAN16_52_BIT); close(vcpu_fd); close(vm_fd); @@ -584,3 +683,50 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm) sparsebit_set_num(vm->vpages_valid, 0, (1ULL << vm->va_bits) >> vm->page_shift); } + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c index a076e780be5d..a076e780be5d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c +++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c new file mode 100644 index 000000000000..ddab0ce89d4d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +vm_vaddr_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + "Unexpected ucall exit mmio address access"); + return (void *)(*((uint64_t *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index b5f28d21a947..d0f7bd0984b8 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -3,8 +3,10 @@ * ARM Generic Interrupt Controller (GIC) v3 host support */ +#include <linux/kernel.h> #include <linux/kvm.h> #include <linux/sizes.h> +#include <asm/cputype.h> #include <asm/kvm_para.h> #include <asm/kvm.h> @@ -13,14 +15,23 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * * Input args: * vm - KVM VM * nr_vcpus - Number of vCPUs supported by this VM - * gicd_base_gpa - Guest Physical Address of the Distributor region - * gicr_base_gpa - Guest Physical Address of the Redistributor region * * Output args: None * @@ -30,25 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; - uint64_t redist_attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n", - nr_vcpus, nr_vcpus_created); + uint64_t attr; + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -57,26 +54,53 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - + attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa); + KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); - virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages); + virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages); /* Redistributor setup */ - redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0); + attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0); kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr); + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); - virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages); + virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ @@ -168,3 +192,21 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } + +int vgic_its_setup(struct kvm_vm *vm) +{ + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + u64 attr; + + attr = GITS_BASE_GPA; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &attr); + + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA, + vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE)); + + return its_fd; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 2bd25b191d15..b49690658c60 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ - #include "test_util.h" #include <execinfo.h> diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 266f3876e10a..f34d926d9735 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "Seek to program segment offset failed,\n" " program header idx: %u errno: %i\n" " offset_rv: 0x%jx\n" - " expected: 0x%jx\n", + " expected: 0x%jx", n1, errno, (intmax_t) offset_rv, (intmax_t) phdr.p_offset); test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 1df3ce4b16fd..b04901e55138 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -14,37 +14,33 @@ struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { #ifndef __aarch64__ - guest_mode_append(VM_MODE_DEFAULT, true, true); + guest_mode_append(VM_MODE_DEFAULT, true); #else { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - bool ps4k, ps16k, ps64k; + uint32_t ipa4k, ipa16k, ipa64k; int i; - aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); + aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); - vm_mode_default = NUM_VM_MODES; + guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52); + guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52); + guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52); - if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); - if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); - } - if (limit >= 40) { - guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); - if (ps4k) - vm_mode_default = VM_MODE_P40V48_4K; - } - if (limit >= 36) { - guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); - guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); - } + guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48); + guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48); + guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48); + + guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40); + guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40); + guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40); + + guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36); + guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36); + guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36); + guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36); + + vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES; /* * Pick the first supported IPA size if the default @@ -72,7 +68,7 @@ void guest_modes_append_default(void) close(kvm_fd); /* Starting with z13 we have 47bits of physical address */ if (info.ibc >= 0x30) - guest_mode_append(VM_MODE_P47V64_4K, true, true); + guest_mode_append(VM_MODE_P47V64_4K, true); } #endif #ifdef __riscv @@ -80,9 +76,9 @@ void guest_modes_append_default(void) unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true, true); + guest_mode_append(VM_MODE_P52V48_4K, true); if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_4K, true); } #endif } diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c new file mode 100644 index 000000000000..74627514c4d4 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "ucall_common.h" + +#define APPEND_BUFFER_SAFE(str, end, v) \ +do { \ + GUEST_ASSERT(str < end); \ + *str++ = (v); \ +} while (0) + +static int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* Must be 32 == 0x20 */ +#define SPECIAL 64 /* 0x */ + +#define __do_div(n, base) \ +({ \ + int __res; \ + \ + __res = ((uint64_t) n) % (uint32_t) base; \ + n = ((uint64_t) n) / (uint32_t) base; \ + __res; \ +}) + +static char *number(char *str, const char *end, long num, int base, int size, + int precision, int type) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char c, sign, locase; + int i; + + /* + * locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters + */ + locase = (type & SMALL); + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 16) + return NULL; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = (digits[__do_div(num, base)] | locase); + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + if (sign) + APPEND_BUFFER_SAFE(str, end, sign); + if (type & SPECIAL) { + if (base == 8) + APPEND_BUFFER_SAFE(str, end, '0'); + else if (base == 16) { + APPEND_BUFFER_SAFE(str, end, '0'); + APPEND_BUFFER_SAFE(str, end, 'x'); + } + } + if (!(type & LEFT)) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, c); + while (i < precision--) + APPEND_BUFFER_SAFE(str, end, '0'); + while (i-- > 0) + APPEND_BUFFER_SAFE(str, end, tmp[i]); + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + + return str; +} + +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) +{ + char *str, *end; + const char *s; + uint64_t num; + int i, base; + int len; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* + * min. # of digits for integers; max + * number of chars for from string + */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + end = buf + n; + GUEST_ASSERT(buf < end); + GUEST_ASSERT(n > 0); + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + APPEND_BUFFER_SAFE(str, end, *fmt); + continue; + } + + /* process flags */ + flags = 0; +repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + APPEND_BUFFER_SAFE(str, end, + (uint8_t)va_arg(args, int)); + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + for (i = 0; i < len; ++i) + APPEND_BUFFER_SAFE(str, end, *s++); + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= SPECIAL | SMALL | ZEROPAD; + } + str = number(str, end, + (uint64_t)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + APPEND_BUFFER_SAFE(str, end, '%'); + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + APPEND_BUFFER_SAFE(str, end, '%'); + if (*fmt) + APPEND_BUFFER_SAFE(str, end, *fmt); + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, uint64_t); + else if (qualifier == 'h') { + num = (uint16_t)va_arg(args, int); + if (flags & SIGN) + num = (int16_t)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, uint32_t); + str = number(str, end, num, base, field_width, precision, flags); + } + + GUEST_ASSERT(str < end); + *str = '\0'; + return str - buf; +} + +int guest_snprintf(char *buf, int n, const char *fmt, ...) +{ + va_list va; + int len; + + va_start(va, fmt); + len = guest_vsnprintf(buf, n, fmt, va); + va_end(va); + + return len; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9741a7ff6380..8279b6ced8d2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -4,15 +4,15 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #include <assert.h> #include <sched.h> #include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -20,16 +20,33 @@ #define KVM_UTIL_MIN_PFN 2 -static int vcpu_mmap_sz(void); +uint32_t guest_random_seed; +struct guest_random_state guest_rng; +static uint32_t last_guest_seed; -int open_path_or_exit(const char *path, int flags) +static size_t vcpu_mmap_sz(void); + +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -43,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -51,13 +68,16 @@ int open_kvm_dev_path_or_exit(void) return _open_kvm_dev_path_or_exit(O_RDONLY); } -static bool get_module_param_bool(const char *module_name, const char *param) +static ssize_t get_module_param(const char *module_name, const char *param, + void *buffer, size_t buffer_size) { const int path_size = 128; char path[path_size]; - char value; - ssize_t r; - int fd; + ssize_t bytes_read; + int fd, r; + + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); @@ -66,33 +86,53 @@ static bool get_module_param_bool(const char *module_name, const char *param) fd = open_path_or_exit(path, O_RDONLY); - r = read(fd, &value, 1); - TEST_ASSERT(r == 1, "read(%s) failed", path); + bytes_read = read(fd, buffer, buffer_size); + TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes", + path, bytes_read, buffer_size); r = close(fd); TEST_ASSERT(!r, "close(%s) failed", path); - - if (value == 'Y') - return true; - else if (value == 'N') - return false; - - TEST_FAIL("Unrecognized value '%c' for boolean module param", value); + return bytes_read; } -bool get_kvm_param_bool(const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { - return get_module_param_bool("kvm", param); -} + /* + * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the + * NUL char, and 1 byte because the kernel sucks and inserts a newline + * at the end. + */ + char value[16 + 1 + 1]; + ssize_t r; -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); + memset(value, '\0', sizeof(value)); + + r = get_module_param(module_name, param, value, sizeof(value)); + TEST_ASSERT(value[r - 1] == '\n', + "Expected trailing newline, got char '%c'", value[r - 1]); + + /* + * Squash the newline, otherwise atoi_paranoid() will complain about + * trailing non-NUL characters in the string. + */ + value[r - 1] = '\0'; + return atoi_paranoid(value); } -bool get_kvm_amd_param_bool(const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { - return get_module_param_bool("kvm_amd", param); + char value; + ssize_t r; + + r = get_module_param(module_name, param, &value, sizeof(value)); + TEST_ASSERT_EQ(r, 1); + + if (value == 'Y') + return true; + else if (value == 'N') + return false; + + TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } /* @@ -142,12 +182,18 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } const char *vm_guest_mode_string(uint32_t i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", + [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", @@ -155,12 +201,13 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", - [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, @@ -173,6 +220,7 @@ const char *vm_guest_mode_string(uint32_t i) const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 }, [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, @@ -180,12 +228,13 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, @@ -209,7 +258,7 @@ __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); } -struct kvm_vm *____vm_create(enum vm_guest_mode mode) +struct kvm_vm *____vm_create(struct vm_shape shape) { struct kvm_vm *vm; @@ -221,13 +270,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->regions.hva_tree = RB_ROOT; hash_init(vm->regions.slot_hash); - vm->mode = mode; - vm->type = 0; + vm->mode = shape.mode; + vm->type = shape.type; - vm->pa_bits = vm_guest_mode_params[mode].pa_bits; - vm->va_bits = vm_guest_mode_params[mode].va_bits; - vm->page_size = vm_guest_mode_params[mode].page_size; - vm->page_shift = vm_guest_mode_params[mode].page_shift; + vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; + vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; + vm->page_size = vm_guest_mode_params[vm->mode].page_size; + vm->page_shift = vm_guest_mode_params[vm->mode].page_shift; /* Setup mode specific traits. */ switch (vm->mode) { @@ -251,31 +300,36 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) case VM_MODE_P36V48_64K: vm->pgtable_levels = 3; break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: vm->pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); - /* - * Ignore KVM support for 5-level paging (vm->va_bits == 57), - * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this VM_MODE. - */ - TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, - "Linear address width (%d bits) not supported", - vm->va_bits); + kvm_init_vm_address_properties(vm); + pr_debug("Guest physical address width detected: %d\n", vm->pa_bits); - vm->pgtable_levels = 4; - vm->va_bits = 48; + pr_debug("Guest virtual address width detected: %d\n", + vm->va_bits); + + if (vm->va_bits == 57) { + vm->pgtable_levels = 5; + } else { + TEST_ASSERT(vm->va_bits == 48, + "Unexpected guest virtual address width: %d", + vm->va_bits); + vm->pgtable_levels = 4; + } #else - TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: @@ -285,10 +339,11 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->pgtable_levels = 5; break; default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); + TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); } #ifdef __aarch64__ + TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types"); if (vm->pa_bits != 40) vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); #endif @@ -312,10 +367,11 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t extra_mem_pages) { + uint64_t page_size = vm_guest_mode_params[mode].page_size; uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, - "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); + "Use vm_create_barebones() for VMs that _never_ have vCPUs"); TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), "nr_vcpus = %d too large for host, max-vcpus = %d", @@ -340,24 +396,78 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, */ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; + /* Account for the number of pages needed by ucall. */ + nr_pages += ucall_nr_pages_required(page_size); + return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +void kvm_set_files_rlimit(uint32_t nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + +static bool is_guest_memfd_required(struct vm_shape shape) +{ +#ifdef __x86_64__ + return shape.type == KVM_X86_SNP_VM; +#else + return false; +#endif +} + +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, + uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; - int i; + int i, flags; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); + kvm_set_files_rlimit(nr_runnable_vcpus); - vm = ____vm_create(mode); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, + vm_guest_mode_string(shape.mode), shape.type, nr_pages); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + vm = ____vm_create(shape); + + /* + * Force GUEST_MEMFD for the primary memory region if necessary, e.g. + * for CoCo VMs that require GUEST_MEMFD backed private memory. + */ + flags = 0; + if (is_guest_memfd_required(shape)) + flags |= KVM_MEM_GUEST_MEMFD; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags); for (i = 0; i < NR_MEM_REGIONS; i++) vm->memslots[i] = 0; @@ -372,7 +482,14 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + if (guest_random_seed != last_guest_seed) { + pr_info("Random seed: 0x%x\n", guest_random_seed); + last_guest_seed = guest_random_seed; + } + guest_rng = new_guest_random_state(guest_random_seed); + sync_global_to_guest(vm, guest_rng); + + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -396,7 +513,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -405,23 +522,24 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); - vm = __vm_create(mode, nr_vcpus, extra_mem_pages); + vm = __vm_create(shape, nr_vcpus, extra_mem_pages); for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code) +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) { struct kvm_vcpu *vcpus[1]; struct kvm_vm *vm; - vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, - guest_code, vcpus); + vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus); *vcpu = vcpus[0]; return vm; @@ -449,8 +567,9 @@ void kvm_vm_restart(struct kvm_vm *vmp) vm_create_irqchip(vmp); hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { - int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%llx size: 0x%llx", @@ -474,15 +593,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -490,7 +608,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), - "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + "Not allowed to run on pCPU '%d', check cgroups?", pcpu); return pcpu; } @@ -520,7 +638,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int i, r; cpu_list = strdup(pcpus_string); - TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + TEST_ASSERT(cpu_list, "strdup() allocation failed."); r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_getaffinity() failed"); @@ -529,14 +647,14 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 1. Get all pcpus for vcpus. */ for (i = 0; i < nr_vcpus; i++) { - TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i); vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); cpu = strtok(NULL, delim); } /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -586,33 +704,18 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } -/* - * KVM Userspace Memory Region Find - * - * Input Args: - * vm - Virtual Machine - * start - Starting VM physical address - * end - Ending VM physical address, inclusive. - * - * Output Args: None - * - * Return: - * Pointer to overlapping region, NULL if no such region. - * - * Public interface to userspace_mem_region_find. Allows tests to look up - * the memslot datastructure for a given range of guest physical memory. - */ -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end) +static void kvm_stats_release(struct kvm_binary_stats *stats) { - struct userspace_mem_region *region; + if (stats->fd < 0) + return; - region = userspace_mem_region_find(vm, start, end); - if (!region) - return NULL; + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } - return ®ion->region; + kvm_close(stats->fd); + stats->fd = -1; } __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) @@ -634,19 +737,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) */ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - int ret; - if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); - ret = close(vcpu->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vcpu->fd); + kvm_stats_release(&vcpu->stats); list_del(&vcpu->list); @@ -657,42 +756,36 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) void kvm_vm_release(struct kvm_vm *vmp) { struct kvm_vcpu *vcpu, *tmp; - int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); - ret = close(vmp->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vmp->fd); + kvm_close(vmp->kvm_fd); + + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); - ret = close(vmp->kvm_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region, - bool unlink) + struct userspace_mem_region *region) { - int ret; - - if (unlink) { - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); - rb_erase(®ion->hva_node, &vm->regions.hva_tree); - hash_del(®ion->slot_node); - } - - region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + sparsebit_free(®ion->protected_phy_pages); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } + if (region->region.guest_memfd >= 0) + close(region->region.guest_memfd); free(region); } @@ -709,15 +802,9 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) - __vm_mem_region_delete(vmp, region, false); + __vm_mem_region_delete(vmp, region); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -732,7 +819,7 @@ void kvm_vm_free(struct kvm_vm *vmp) int kvm_memfd_alloc(size_t size, bool hugepages) { int memfd_flags = MFD_CLOEXEC; - int fd, r; + int fd; if (hugepages) memfd_flags |= MFD_HUGETLB; @@ -740,85 +827,12 @@ int kvm_memfd_alloc(size_t size, bool hugepages) fd = memfd_create("kvm_selftest", memfd_flags); TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); - r = ftruncate(fd, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); - - r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_ftruncate(fd, size); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); return fd; } -/* - * Memory Compare, host virtual to guest virtual - * - * Input Args: - * hva - Starting host virtual address - * vm - Virtual Machine - * gva - Starting guest virtual address - * len - number of bytes to compare - * - * Output Args: None - * - * Input/Output Args: None - * - * Return: - * Returns 0 if the bytes starting at hva for a length of len - * are equal the guest virtual bytes starting at gva. Returns - * a value < 0, if bytes at hva are less than those at gva. - * Otherwise a value > 0 is returned. - * - * Compares the bytes starting at the host virtual address hva, for - * a length of len, to the guest bytes starting at the guest virtual - * address given by gva. - */ -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) -{ - size_t amt; - - /* - * Compare a batch of bytes until either a match is found - * or all the bytes have been compared. - */ - for (uintptr_t offset = 0; offset < len; offset += amt) { - uintptr_t ptr1 = (uintptr_t)hva + offset; - - /* - * Determine host address for guest virtual address - * at offset. - */ - uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - - /* - * Determine amount to compare on this pass. - * Don't allow the comparsion to cross a page boundary. - */ - amt = len - offset; - if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr1 % vm->page_size); - if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr2 % vm->page_size); - - assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); - assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - - /* - * Perform the comparison. If there is a difference - * return that result to the caller, otherwise need - * to continue on looking for a mismatch. - */ - int ret = memcmp((void *)ptr1, (void *)ptr2, amt); - if (ret != 0) - return ret; - } - - /* - * No mismatch found. Let the caller know the two memory - * areas are equal. - */ - return 0; -} - static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, struct userspace_mem_region *region) { @@ -894,66 +908,81 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } -/* - * VM Userspace Memory Region Add - * - * Input Args: - * vm - Virtual Machine - * src_type - Storage source for this region. - * NULL to use anonymous memory. - * guest_paddr - Starting guest physical address - * slot - KVM region slot - * npages - Number of physical pages - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) - * - * Output Args: None - * - * Return: None - * - * Allocates a memory area of the number of pages specified by npages - * and maps it to the VM specified by vm, at a starting physical address - * given by guest_paddr. The region is created with a KVM region slot - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The - * region is created with the flags given by flags. - */ -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags) +#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \ + __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ + "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") + +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + struct kvm_userspace_memory_region2 region = { + .slot = slot, + .flags = flags, + .guest_phys_addr = gpa, + .memory_size = size, + .userspace_addr = (uintptr_t)hva, + .guest_memfd = guest_memfd, + .guest_memfd_offset = guest_memfd_offset, + }; + + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); +} + +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, + guest_memfd, guest_memfd_offset); + + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)", + errno, strerror(errno)); +} + + +/* FIXME: This thing needs to be ripped apart and rewritten. */ +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags, + int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); + size_t mem_size = npages * vm->page_size; size_t alignment; + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); - TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" - " guest_paddr: 0x%lx vm->page_size: 0x%x", - guest_paddr, vm->page_size); - TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + " gpa: 0x%lx vm->page_size: 0x%x", + gpa, vm->page_size); + TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1) <= vm->max_gfn, "Physical range beyond maximum " "supported physical address,\n" - " guest_paddr: 0x%lx npages: 0x%lx\n" + " gpa: 0x%lx npages: 0x%lx\n" " vm->max_gfn: 0x%lx vm->page_size: 0x%x", - guest_paddr, npages, vm->max_gfn, vm->page_size); + gpa, npages, vm->max_gfn, vm->page_size); /* * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); + vm, gpa, (gpa + npages * vm->page_size) - 1); if (region != NULL) TEST_FAIL("overlapping userspace_mem_region already " "exists\n" - " requested guest_paddr: 0x%lx npages: 0x%lx " - "page_size: 0x%x\n" - " existing guest_paddr: 0x%lx size: 0x%lx", - guest_paddr, npages, vm->page_size, + " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" + " existing gpa: 0x%lx size: 0x%lx", + gpa, npages, vm->page_size, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); @@ -967,8 +996,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", - slot, guest_paddr, npages, - region->region.slot, + slot, gpa, npages, region->region.slot, (uint64_t) region->region.guest_phys_addr, (uint64_t) region->region.memory_size); } @@ -976,7 +1004,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* Allocate and initialize new mem region structure. */ region = calloc(1, sizeof(*region)); TEST_ASSERT(region != NULL, "Insufficient Memory"); - region->mmap_size = npages * vm->page_size; + region->mmap_size = mem_size; #ifdef __s390x__ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ @@ -994,7 +1022,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -1005,12 +1033,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1023,29 +1048,53 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { - ret = madvise(region->host_mem, npages * vm->page_size, + ret = madvise(region->host_mem, mem_size, src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", - region->host_mem, npages * vm->page_size, + region->host_mem, mem_size, vm_mem_backing_src_alias(src_type)->name); } region->backing_src_type = src_type; + + if (flags & KVM_MEM_GUEST_MEMFD) { + if (guest_memfd < 0) { + uint32_t guest_memfd_flags = 0; + TEST_ASSERT(!guest_memfd_offset, + "Offset must be zero when creating new guest_memfd"); + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); + } else { + /* + * Install a unique fd for each memslot so that the fd + * can be closed when the region is deleted without + * needing to track if the fd is owned by the framework + * or by the caller. + */ + guest_memfd = kvm_dup(guest_memfd); + } + + region->region.guest_memfd = guest_memfd; + region->region.guest_memfd_offset = guest_memfd_offset; + } else { + region->region.guest_memfd = -1; + } + region->unused_phy_pages = sparsebit_alloc(); - sparsebit_set_num(region->unused_phy_pages, - guest_paddr >> vm->page_shift, npages); + if (vm_arch_has_protected_memory(vm)) + region->protected_phy_pages = sparsebit_alloc(); + sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages); region->region.slot = slot; region->region.flags = flags; - region->region.guest_phys_addr = guest_paddr; + region->region.guest_phys_addr = gpa; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", - ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size); + " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d", + ret, errno, slot, flags, gpa, region->region.memory_size, + region->region.guest_memfd); /* Add to quick lookup data structures */ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); @@ -1054,18 +1103,24 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); } } +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t gpa, uint32_t slot, uint64_t npages, + uint32_t flags) +{ + vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); +} + /* * Memslot to region * @@ -1122,13 +1177,23 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) region->region.flags = flags; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i slot: %u flags: 0x%x", ret, errno, slot, flags); } +void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot) +{ + struct userspace_mem_region *region = memslot2region(vm, slot); + struct kvm_userspace_memory_region2 tmp = region->region; + + tmp.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); +} + /* * VM Memory Region Move * @@ -1152,9 +1217,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) region->region.guest_phys_addr = new_gpa; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n" "ret: %i errno: %i slot: %u new_gpa: 0x%lx", ret, errno, slot, new_gpa); } @@ -1174,18 +1239,51 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot), true); + struct userspace_mem_region *region = memslot2region(vm, slot); + + region->region.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + __vm_mem_region_delete(vm, region); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, + bool punch_hole) +{ + const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); + struct userspace_mem_region *region; + uint64_t end = base + size; + uint64_t gpa, len; + off_t fd_offset; + int ret; + + for (gpa = base; gpa < end; gpa += len) { + uint64_t offset; + + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, + "Private memory region not found for GPA 0x%lx", gpa); + + offset = gpa - region->region.guest_phys_addr; + fd_offset = region->region.guest_memfd_offset + offset; + len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + + ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); + TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", + punch_hole ? "punch hole" : "allocate", gpa, len, + region->region.guest_memfd, mode, fd_offset); + } } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1214,7 +1312,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_vcpu *vcpu; /* Confirm a vcpu with the specified id doesn't already exist. */ - TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); + TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id); /* Allocate and initialize new vcpu structure. */ vcpu = calloc(1, sizeof(*vcpu)); @@ -1223,15 +1321,18 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu->vm = vm; vcpu->id = vcpu_id; vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); - TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); + TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -1325,15 +1426,17 @@ va_found: return pgidx_start * vm->page_size; } -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type, + bool protected) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); - vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, - vm->memslots[type]); + vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages, + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type], protected); /* * Find an unused range of virtual page addresses of at least @@ -1346,13 +1449,25 @@ vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr); - - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; } +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) +{ + return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, + vm_arch_has_protected_memory(vm)); +} + +vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) +{ + return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); +} + /* * VM Virtual Address Allocate * @@ -1447,7 +1562,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); vaddr += page_size; paddr += page_size; @@ -1475,6 +1589,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) { struct userspace_mem_region *region; + gpa = vm_untag_gpa(vm, gpa); + region = userspace_mem_region_find(vm, gpa, gpa); if (!region) { TEST_FAIL("No vm physical memory at 0x%lx", gpa); @@ -1564,7 +1680,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } @@ -1577,7 +1704,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu) rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vcpu); + if (!rc) + assert_on_unhandled_exception(vcpu); return rc; } @@ -1643,9 +1771,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); @@ -1821,6 +1948,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) region->host_mem); fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); sparsebit_dump(stream, region->unused_phy_pages, 0); + if (region->protected_phy_pages) { + fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, ""); + sparsebit_dump(stream, region->protected_phy_pages, 0); + } } fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); sparsebit_dump(stream, vm->vpages_mapped, indent + 2); @@ -1882,9 +2013,9 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), + KVM_EXIT_STRING(ARM_SEA), }; /* @@ -1922,6 +2053,7 @@ const char *exit_reason_str(unsigned int exit_reason) * num - number of pages * paddr_min - Physical address minimum * memslot - Memory region to allocate page from + * protected - True if the pages will be used as protected/private memory * * Output Args: None * @@ -1933,8 +2065,9 @@ const char *exit_reason_str(unsigned int exit_reason) * and their base address is returned. A TEST_ASSERT failure occurs if * not enough pages are available at or above paddr_min. */ -vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot) +vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot, + bool protected) { struct userspace_mem_region *region; sparsebit_idx_t pg, base; @@ -1947,8 +2080,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, paddr_min, vm->page_size); region = memslot2region(vm, memslot); - base = pg = paddr_min >> vm->page_shift; + TEST_ASSERT(!protected || region->protected_phy_pages, + "Region doesn't support protected memory"); + base = pg = paddr_min >> vm->page_shift; do { for (; pg < base + num; ++pg) { if (!sparsebit_is_set(region->unused_phy_pages, pg)) { @@ -1967,8 +2102,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, abort(); } - for (pg = base; pg < base + num; ++pg) + for (pg = base; pg < base + num; ++pg) { sparsebit_clear(region->unused_phy_pages, pg); + if (protected) + sparsebit_set(region->protected_phy_pages, pg); + } return base * vm->page_size; } @@ -2115,49 +2253,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); +} + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } @@ -2165,10 +2296,57 @@ __weak void kvm_selftest_arch_init(void) { } +static void report_unexpected_signal(int signum) +{ +#define KVM_CASE_SIGNUM(sig) \ + case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum) + + switch (signum) { + KVM_CASE_SIGNUM(SIGBUS); + KVM_CASE_SIGNUM(SIGSEGV); + KVM_CASE_SIGNUM(SIGILL); + KVM_CASE_SIGNUM(SIGFPE); + default: + TEST_FAIL("Unexpected signal %d\n", signum); + } +} + void __attribute((constructor)) kvm_selftest_init(void) { + struct sigaction sig_sa = { + .sa_handler = report_unexpected_signal, + }; + /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + sigaction(SIGBUS, &sig_sa, NULL); + sigaction(SIGSEGV, &sig_sa, NULL); + sigaction(SIGILL, &sig_sa, NULL); + sigaction(SIGFPE, &sig_sa, NULL); + + guest_random_seed = last_guest_seed = random(); + pr_info("Random seed: 0x%x\n", guest_random_seed); + kvm_selftest_arch_init(); } + +bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) +{ + sparsebit_idx_t pg = 0; + struct userspace_mem_region *region; + + if (!vm_arch_has_protected_memory(vm)) + return false; + + region = userspace_mem_region_find(vm, paddr, paddr); + TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr); + + pg = paddr >> vm->page_shift; + return sparsebit_is_set(region->protected_phy_pages, pg); +} + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..3f1e4b67c5ae --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_PRMD + st.d t0, sp, PRMD_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + ld.d t0, sp, PC_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_ERA + ld.d t0, sp, PRMD_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_PRMD + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..07c103369ddb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include <asm/kvm.h> +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static vm_paddr_t invalid_pgtable[4]; +static vm_vaddr_t exception_handlers; + +static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + unsigned int shift; + uint64_t mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child) +{ + uint64_t *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + vm_paddr_t child, table; + + if (vm->pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->pgd = table; + vm->pgd_created = true; +} + +static int virt_pte_none(uint64_t *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) +{ + int level; + uint64_t *ptep; + vm_paddr_t child; + + if (!vm->pgd_created) + goto unmapped_gva; + + child = vm->pgd; + level = vm->pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint32_t prot_bits; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, vaddr, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, paddr | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ + uint64_t pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level; + + if (!vm->pgd_created) + return; + + level = vm->pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + int vector; + unsigned long pc, estat, badv; + struct handlers *handlers; + + handlers = (struct handlers *)exception_handlers; + vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + if (handlers && handlers->exception_handlers[vector]) + return handlers->exception_handlers[vector](regs); + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + void *addr; + + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + + addr = addr_gva2hva(vm, vm->handlers); + memset(addr, 0, vm->page_size); + exception_handlers = vm->handlers; + sync_global_to_guest(vm, exception_handlers); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector] = handler; +} + +uint32_t guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, uint64_t); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + __vcpu_set_reg(vcpu, id, val); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + /* time count start from 0 */ + val = 0; + loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val); + + width = vm->page_shift - 3; + + switch (vm->pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_vaddr_alloc(vm, vm->page_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + size_t stack_size; + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_vaddr + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (uint64_t)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c index f212bd8ab93d..fc6cbb50573f 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -2,7 +2,6 @@ /* * ucall support. A ucall is a "hypercall to userspace". * - * Copyright (C) 2018, Red Hat, Inc. */ #include "kvm_util.h" @@ -10,7 +9,7 @@ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each * VM), it must not be accessed from host code. */ -static vm_vaddr_t *ucall_exit_mmio_addr; +vm_vaddr_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { @@ -23,11 +22,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); } -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - WRITE_ONCE(*ucall_exit_mmio_addr, uc); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -36,6 +30,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), "Unexpected ucall exit mmio address access"); + return (void *)(*((uint64_t *)run->mmio.data)); } diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c new file mode 100644 index 000000000000..46a14fd63d9e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + */ + +#include <time.h> + +#include "lru_gen_util.h" + +/* + * Tracks state while we parse memcg lru_gen stats. The file we're parsing is + * structured like this (some extra whitespace elided): + * + * memcg (id) (path) + * node (id) + * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages) + */ +struct memcg_stats_parse_context { + bool consumed; /* Whether or not this line was consumed */ + /* Next parse handler to invoke */ + void (*next_handler)(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + int current_node_idx; /* Current index in nodes array */ + const char *name; /* The name of the memcg we're looking for */ +}; + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + +struct split_iterator { + char *str; + char *save; +}; + +static char *split_next(struct split_iterator *it) +{ + char *ret = strtok_r(it->str, " \t\n\r", &it->save); + + it->str = NULL; + return ret; +} + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *memcg_id = split_next(&it); + char *memcg_name = split_next(&it); + char *end; + + ctx->consumed = true; + + if (!prefix || strcmp("memcg", prefix)) + return; /* Not a memcg line (maybe empty), skip */ + + TEST_ASSERT(memcg_id && memcg_name, + "malformed memcg line; no memcg id or memcg_name"); + + if (strcmp(memcg_name + 1, ctx->name)) + return; /* Wrong memcg, skip */ + + /* Found it! */ + + stats->memcg_id = strtoul(memcg_id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id); + if (!stats->memcg_id) + return; /* Removed memcg? */ + + ctx->next_handler = memcg_stats_handle_in_memcg; +} + +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *id = split_next(&it); + long found_node_id; + char *end; + + ctx->consumed = true; + ctx->current_node_idx = -1; + + if (!prefix) + return; /* Skip empty lines */ + + if (!strcmp("memcg", prefix)) { + /* Memcg done, found next one; stop. */ + ctx->next_handler = NULL; + return; + } else if (strcmp("node", prefix)) + TEST_ASSERT(false, "found malformed line after 'memcg ...'," + "token: '%s'", prefix); + + /* At this point we know we have a node line. Parse the ID. */ + + TEST_ASSERT(id, "malformed node line; no node id"); + + found_node_id = strtol(id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed node id '%s'", id); + + ctx->current_node_idx = stats->nr_nodes++; + TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES, + "memcg has stats for too many nodes, max is %d", + MAX_NR_NODES); + stats->nodes[ctx->current_node_idx].node = found_node_id; + + ctx->next_handler = memcg_stats_handle_in_node; +} + +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + char *my_line = strdup(line); + struct split_iterator it = { .str = my_line }; + char *gen, *age, *nr_anon, *nr_file; + struct node_stats *node_stats; + struct generation_stats *gen_stats; + char *end; + + TEST_ASSERT(it.str, "failed to copy input line"); + + gen = split_next(&it); + + if (!gen) + goto out_consume; /* Skip empty lines */ + + if (!strcmp("memcg", gen) || !strcmp("node", gen)) { + /* + * Reached next memcg or node section. Don't consume, let the + * other handler deal with this. + */ + ctx->next_handler = memcg_stats_handle_in_memcg; + goto out; + } + + node_stats = &stats->nodes[ctx->current_node_idx]; + TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS, + "found too many generation lines; max is %d", + MAX_NR_GENS); + gen_stats = &node_stats->gens[node_stats->nr_gens++]; + + age = split_next(&it); + nr_anon = split_next(&it); + nr_file = split_next(&it); + + TEST_ASSERT(age && nr_anon && nr_file, + "malformed generation line; not enough tokens"); + + gen_stats->gen = (int)strtol(gen, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen); + + gen_stats->age_ms = strtol(age, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age); + + gen_stats->nr_anon = strtol(nr_anon, &end, 10); + TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'", + nr_anon); + + gen_stats->nr_file = strtol(nr_file, &end, 10); + TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file); + +out_consume: + ctx->consumed = true; +out: + free(my_line); +} + +static void print_memcg_stats(const struct memcg_stats *stats, const char *name) +{ + int node, gen; + + pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id); + for (node = 0; node < stats->nr_nodes; ++node) { + pr_debug("\tnode %d\n", stats->nodes[node].node); + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + const struct generation_stats *gstats = + &stats->nodes[node].gens[gen]; + + pr_debug("\t\tgen %d\tage_ms %ld" + "\tnr_anon %ld\tnr_file %ld\n", + gstats->gen, gstats->age_ms, gstats->nr_anon, + gstats->nr_file); + } + } +} + +/* Re-read lru_gen debugfs information for @memcg into @stats. */ +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg) +{ + FILE *f; + ssize_t read = 0; + char *line = NULL; + size_t bufsz; + struct memcg_stats_parse_context ctx = { + .next_handler = memcg_stats_handle_searching, + .name = memcg, + }; + + memset(stats, 0, sizeof(struct memcg_stats)); + + f = fopen(LRU_GEN_DEBUGFS, "r"); + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + + while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) { + ctx.consumed = false; + + do { + ctx.next_handler(stats, &ctx, line); + if (!ctx.next_handler) + break; + } while (!ctx.consumed); + } + + if (read < 0 && !feof(f)) + TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS); + + TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n" + "Did the memcg get created in the proper mount?", + memcg); + if (line) + free(line); + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); + + print_memcg_stats(stats, memcg); +} + +/* + * Find all pages tracked by lru_gen for this memcg in generation @target_gen. + * + * If @target_gen is negative, look for all generations. + */ +long lru_gen_sum_memcg_stats_for_gen(int target_gen, + const struct memcg_stats *stats) +{ + int node, gen; + long total_nr = 0; + + for (node = 0; node < stats->nr_nodes; ++node) { + const struct node_stats *node_stats = &stats->nodes[node]; + + for (gen = 0; gen < node_stats->nr_gens; ++gen) { + const struct generation_stats *gen_stats = + &node_stats->gens[gen]; + + if (target_gen >= 0 && gen_stats->gen != target_gen) + continue; + + total_nr += gen_stats->nr_anon + gen_stats->nr_file; + } + } + + return total_nr; +} + +/* Find all pages tracked by lru_gen for this memcg. */ +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats) +{ + return lru_gen_sum_memcg_stats_for_gen(-1, stats); +} + +/* + * If lru_gen aging should force page table scanning. + * + * If you want to set this to false, you will need to do eviction + * before doing extra aging passes. + */ +static const bool force_scan = true; + +static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen) +{ + FILE *f = fopen(LRU_GEN_DEBUGFS, "w"); + char *command; + size_t sz; + + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + sz = asprintf(&command, "+ %lu %d %d 1 %d\n", + memcg_id, node_id, max_gen, force_scan); + TEST_ASSERT(sz > 0, "creating aging command failed"); + + pr_debug("Running aging command: %s", command); + if (fwrite(command, sizeof(char), sz, f) < sz) { + TEST_ASSERT(false, "writing aging command %s to %s failed", + command, LRU_GEN_DEBUGFS); + } + + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); +} + +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg) +{ + int node, gen; + + pr_debug("lru_gen: invoking aging...\n"); + + /* Must read memcg stats to construct the proper aging command. */ + lru_gen_read_memcg_stats(stats, memcg); + + for (node = 0; node < stats->nr_nodes; ++node) { + int max_gen = 0; + + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + int this_gen = stats->nodes[node].gens[gen].gen; + + max_gen = max_gen > this_gen ? max_gen : this_gen; + } + + run_aging_impl(stats->memcg_id, stats->nodes[node].node, + max_gen); + } + + /* Re-read so callers get updated information */ + lru_gen_read_memcg_stats(stats, memcg); +} + +/* + * Find which generation contains at least @pages pages, assuming that + * such a generation exists. + */ +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long pages) +{ + int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1; + + for (node = 0; node < stats->nr_nodes; ++node) + for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens; + ++gen_idx) { + gen = stats->nodes[node].gens[gen_idx].gen; + max_gen = gen > max_gen ? gen : max_gen; + min_gen = gen < min_gen ? gen : min_gen; + } + + for (gen = min_gen; gen <= max_gen; ++gen) + /* See if this generation has enough pages. */ + if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages) + return gen; + + return -1; +} + +bool lru_gen_usable(void) +{ + long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK; + int lru_gen_fd, lru_gen_debug_fd; + char mglru_feature_str[8] = {}; + long mglru_features; + + lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY); + if (lru_gen_fd < 0) { + puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH); + return false; + } + if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) { + puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH); + close(lru_gen_fd); + return false; + } + close(lru_gen_fd); + + mglru_features = strtol(mglru_feature_str, NULL, 16); + if ((mglru_features & required_features) != required_features) { + printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n", + mglru_features, required_features); + printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n", + required_features); + return false; + } + + lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR); + __TEST_REQUIRE(lru_gen_debug_fd >= 0, + "lru_gen: Could not open " LRU_GEN_DEBUGFS ", " + "but lru_gen is enabled, so cannot use page_idle."); + close(lru_gen_debug_fd); + return true; +} diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index df457452d146..557c0a0a5658 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,14 +2,13 @@ /* * Copyright (C) 2020, Google LLC. */ -#define _GNU_SOURCE - #include <inttypes.h> #include <linux/bitmap.h> #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "ucall_common.h" struct memstress_args memstress_args; @@ -56,7 +55,7 @@ void memstress_guest_code(uint32_t vcpu_idx) uint64_t page; int i; - rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -76,7 +75,7 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); - if (guest_random_u32(&rand_state) % 100 < args->write_percent) + if (__guest_random_bool(&rand_state, args->write_percent)) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -168,7 +167,8 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, * The memory is also added to memslot 0, but that's a benign side * effect as KVM allows aliasing HVAs in meslots. */ - vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, + slot0_pages + guest_num_pages, memstress_guest_code, vcpus); args->vm = vm; @@ -191,7 +191,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 - " nr_vcpus: %d wss: %" PRIx64 "]\n", + " nr_vcpus: %d wss: %" PRIx64 "]", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; @@ -242,12 +242,6 @@ void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) sync_global_to_guest(vm, memstress_args.write_percent); } -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) -{ - memstress_args.random_seed = random_seed; - sync_global_to_guest(vm, memstress_args.random_seed); -} - void memstress_set_random_access(struct kvm_vm *vm, bool random_access) { memstress_args.random_access = random_access; @@ -271,7 +265,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S new file mode 100644 index 000000000000..b787b982e922 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2023 Intel Corporation + */ + +#ifndef __ASSEMBLY__ +#define __ASSEMBLY__ +#endif + +#include <asm/csr.h> + +.macro save_context + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) + csrr s0, CSR_SEPC + csrr s1, CSR_SSTATUS + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) + sd s1, 256(sp) + sd s2, 264(sp) + sd s3, 272(sp) +.endm + +.macro restore_context + ld s3, 272(sp) + ld s2, 264(sp) + ld s1, 256(sp) + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 + csrw CSR_SSTATUS, s1 + csrw CSR_SEPC, s0 + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) +.endm + +.balign 4 +.global exception_vectors +exception_vectors: + save_context + move a0, sp + call route_exception + restore_context + sret diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d146ca71e0c0..2eac7d4b59e9 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -10,9 +10,22 @@ #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 +static vm_vaddr_t exception_handlers; + +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) +{ + unsigned long value = 0; + int ret; + + ret = __vcpu_get_reg(vcpu, ext, &value); + + return !ret && !!value; +} + static uint64_t page_align(struct kvm_vm *vm, uint64_t v) { return (v + vm->page_size) & ~(vm->page_size - 1); @@ -201,46 +214,46 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; satp |= SATP_MODE_48; - vcpu_set_reg(vcpu, RISCV_CSR_REG(satp), satp); + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); - vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); + core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); + core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); + core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); + core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); + core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); + core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); + core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); + core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); + core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); + core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); + core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); + core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); + core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); + core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); + core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); + core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); + core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); + core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); + core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); + core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); + core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); + core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); + core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); + core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); + core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); + core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); + core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); + core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); + core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); + core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); + core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); + core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); fprintf(stream, " MODE: 0x%lx\n", core.mode); @@ -277,8 +290,12 @@ static void __aligned(16) guest_unexp_trap(void) 0, 0, 0, 0, 0, 0); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { int r; size_t stack_size; @@ -312,10 +329,12 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, /* Setup stack pointer and program counter of guest */ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); - vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); + + /* Setup sscratch for guest_get_vcpuid() */ + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); /* Setup default exception vector of guest */ - vcpu_set_reg(vcpu, RISCV_CSR_REG(stvec), (unsigned long)guest_unexp_trap); + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap); return vcpu; } @@ -327,7 +346,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); @@ -364,6 +383,135 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) va_end(ap); } +void kvm_exit_unexpected_exception(int vector, int ec) +{ + ucall(UCALL_UNHANDLED, 2, vector, ec); +} + void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { + struct ucall uc; + + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { + TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", + uc.args[0], uc.args[1]); + } +} + +struct handlers { + exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; +}; + +void route_exception(struct pt_regs *regs) +{ + struct handlers *handlers = (struct handlers *)exception_handlers; + int vector = 0, ec; + + ec = regs->cause & ~CAUSE_IRQ_FLAG; + if (ec >= NR_EXCEPTIONS) + goto unexpected_exception; + + /* Use the same handler for all the interrupts */ + if (regs->cause & CAUSE_IRQ_FLAG) { + vector = 1; + ec = 0; + } + + if (handlers && handlers->exception_handlers[vector][ec]) + return handlers->exception_handlers[vector][ec](regs); + +unexpected_exception: + return kvm_exit_unexpected_exception(vector, ec); +} + +void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) +{ + extern char exception_vectors; + + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors); +} + +void vm_init_vector_tables(struct kvm_vm *vm) +{ + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < NR_EXCEPTIONS); + handlers->exception_handlers[0][vector] = handler; +} + +void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + handlers->exception_handlers[1][0] = handler; +} + +uint32_t guest_get_vcpuid(void) +{ + return csr_read(CSR_SSCRATCH); +} + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + struct sbiret ret; + + asm volatile ( + "ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} + +bool guest_sbi_probe_extension(int extid, long *out_val) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + + __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED, + "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value); + + if (ret.error == SBI_ERR_NOT_SUPPORTED) + return false; + + if (out_val) + *out_val = ret.value; + + return true; +} + +unsigned long get_host_sbi_spec_version(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0, + 0, 0, 0, 0, 0); + + GUEST_ASSERT(!ret.error); + + return ret.value; } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 9a3476a2dfca..b5035c63d516 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,43 +9,7 @@ #include "kvm_util.h" #include "processor.h" - -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); - register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); - register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); - register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); - register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); - struct sbiret ret; - - asm volatile ( - "ecall" - : "+r" (a0), "+r" (a1) - : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) - : "memory"); - ret.error = a0; - ret.value = a1; - - return ret; -} - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, - KVM_RISCV_SELFTESTS_SBI_UCALL, - uc, 0, 0, 0, 0, 0); -} +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..2c432fa164f1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 15945121daf1..8ceeb17c819a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); if (vm->pgd_created) @@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) } /* Fill in page table entry */ - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ if (!(entry[idx] & PAGE_INVALID)) fprintf(stderr, "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); @@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) int ri, idx; uint64_t *entry; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); entry = addr_gpa2hva(vm, vm->pgd); @@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ TEST_ASSERT(!(entry[idx] & PAGE_INVALID), "No page mapping for vm virtual address 0x%lx", gva); @@ -155,17 +155,20 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump_region(stream, vm, indent, vm->pgd); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + vcpu->run->psw_addr = (uintptr_t)guest_code; +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); uint64_t stack_vaddr; struct kvm_regs regs; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; - struct kvm_run *run; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); stack_vaddr = __vm_vaddr_alloc(vm, stack_size, @@ -184,9 +187,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ vcpu_sregs_set(vcpu, &sregs); - run = vcpu->run; - run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ - run->psw_addr = (uintptr_t)guest_code; + vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ return vcpu; } @@ -198,7 +199,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); @@ -220,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c index a7f02dc372cf..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390/ucall.c @@ -6,16 +6,6 @@ */ #include "kvm_util.h" -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 50e0cf41a7dd..a99188f87a38 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep) /* Returns a pointer to the node that describes the * lowest bit index. */ -static struct node *node_first(struct sparsebit *s) +static struct node *node_first(const struct sparsebit *s) { struct node *nodep; @@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s) * lowest bit index > the index of the node pointed to by np. * Returns NULL if no node with a higher index exists. */ -static struct node *node_next(struct sparsebit *s, struct node *np) +static struct node *node_next(const struct sparsebit *s, struct node *np) { struct node *nodep = np; @@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np) * highest index < the index of the node pointed to by np. * Returns NULL if no node with a lower index exists. */ -static struct node *node_prev(struct sparsebit *s, struct node *np) +static struct node *node_prev(const struct sparsebit *s, struct node *np) { struct node *nodep = np; @@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np) * subtree and duplicates the bit settings to the newly allocated nodes. * Returns the newly allocated copy of subtree. */ -static struct node *node_copy_subtree(struct node *subtree) +static struct node *node_copy_subtree(const struct node *subtree) { struct node *root; @@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree) * index is within the bits described by the mask bits or the number of * contiguous bits set after the mask. Returns NULL if there is no such node. */ -static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) +static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx) { struct node *nodep; @@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) } /* Returns whether all the bits in the sparsebit array are set. */ -bool sparsebit_all_set(struct sparsebit *s) +bool sparsebit_all_set(const struct sparsebit *s) { /* * If any nodes there must be at least one bit set. Only case @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ @@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep) tmp = node_prev(s, nodep); node_rm(s, nodep); - nodep = NULL; nodep = tmp; reduction_performed = true; @@ -776,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep) /* Returns whether the bit at the index given by idx, within the * sparsebit array is set or not. */ -bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) +bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx) { struct node *nodep; @@ -922,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) * used by test cases after they detect an unexpected condition, as a means * to capture diagnostic information. */ -static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, +static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s, unsigned int indent) { /* Dump the contents of s */ @@ -970,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp) * sparsebit_alloc(). It can though already have bits set, which * if different from src will be cleared. */ -void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) +void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s) { /* First clear any bits already set in the destination */ sparsebit_clear_all(d); @@ -982,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) } /* Returns whether num consecutive bits starting at idx are all set. */ -bool sparsebit_is_set_num(struct sparsebit *s, +bool sparsebit_is_set_num(const struct sparsebit *s, sparsebit_idx_t idx, sparsebit_num_t num) { sparsebit_idx_t next_cleared; @@ -1006,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s, } /* Returns whether the bit at the index given by idx. */ -bool sparsebit_is_clear(struct sparsebit *s, +bool sparsebit_is_clear(const struct sparsebit *s, sparsebit_idx_t idx) { return !sparsebit_is_set(s, idx); } /* Returns whether num consecutive bits starting at idx are all cleared. */ -bool sparsebit_is_clear_num(struct sparsebit *s, +bool sparsebit_is_clear_num(const struct sparsebit *s, sparsebit_idx_t idx, sparsebit_num_t num) { sparsebit_idx_t next_set; @@ -1042,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s, * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, * to determine if the sparsebit array has any bits set. */ -sparsebit_num_t sparsebit_num_set(struct sparsebit *s) +sparsebit_num_t sparsebit_num_set(const struct sparsebit *s) { return s->num_set; } /* Returns whether any bit is set in the sparsebit array. */ -bool sparsebit_any_set(struct sparsebit *s) +bool sparsebit_any_set(const struct sparsebit *s) { /* * Nodes only describe set bits. If any nodes then there @@ -1071,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s) } /* Returns whether all the bits in the sparsebit array are cleared. */ -bool sparsebit_all_clear(struct sparsebit *s) +bool sparsebit_all_clear(const struct sparsebit *s) { return !sparsebit_any_set(s); } /* Returns whether all the bits in the sparsebit array are set. */ -bool sparsebit_any_clear(struct sparsebit *s) +bool sparsebit_any_clear(const struct sparsebit *s) { return !sparsebit_all_set(s); } /* Returns the index of the first set bit. Abort if no bits are set. */ -sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) +sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s) { struct node *nodep; @@ -1098,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) /* Returns the index of the first cleared bit. Abort if * no bits are cleared. */ -sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) +sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s) { struct node *nodep1, *nodep2; @@ -1152,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) /* Returns index of next bit set within s after the index given by prev. * Returns 0 if there are no bits after prev that are set. */ -sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, +sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s, sparsebit_idx_t prev) { sparsebit_idx_t lowest_possible = prev + 1; @@ -1245,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, /* Returns index of next bit cleared within s after the index given by prev. * Returns 0 if there are no bits after prev that are cleared. */ -sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, +sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s, sparsebit_idx_t prev) { sparsebit_idx_t lowest_possible = prev + 1; @@ -1301,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, * and returns the index of the first sequence of num consecutively set * bits. Returns a value of 0 of no such sequence exists. */ -sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, +sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s, sparsebit_idx_t start, sparsebit_num_t num) { sparsebit_idx_t idx; @@ -1336,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, * and returns the index of the first sequence of num consecutively cleared * bits. Returns a value of 0 of no such sequence exists. */ -sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, +sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s, sparsebit_idx_t start, sparsebit_num_t num) { sparsebit_idx_t idx; @@ -1584,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low, * contiguous bits. This is done because '-' is used to specify command-line * options, and sometimes ranges are specified as command-line arguments. */ -void sparsebit_dump(FILE *stream, struct sparsebit *s, +void sparsebit_dump(FILE *stream, const struct sparsebit *s, unsigned int indent) { size_t current_line_len = 0; @@ -1682,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s, * s. On error, diagnostic information is printed to stderr and * abort is called. */ -void sparsebit_validate_internal(struct sparsebit *s) +void sparsebit_validate_internal(const struct sparsebit *s) { bool error_detected = false; struct node *nodep, *prev = NULL; diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c index 632398adc229..5d1c87277c49 100644 --- a/tools/testing/selftests/kvm/lib/string_override.c +++ b/tools/testing/selftests/kvm/lib/string_override.c @@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count) *xs++ = c; return s; } + +size_t strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b772193f6c18..8a1848586a85 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,7 +4,8 @@ * * Copyright (C) 2020, Google LLC. */ - +#include <stdio.h> +#include <stdarg.h> #include <assert.h> #include <ctype.h> #include <limits.h> @@ -17,6 +18,13 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. @@ -131,37 +139,57 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret; - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path); return ret == 0; } -size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret; - TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path); ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path); + fclose(f); return size; } +size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + +bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64]; @@ -377,3 +405,40 @@ int atoi_paranoid(const char *num_str) return num; } + +char *strdup_printf(const char *fmt, ...) +{ + va_list ap; + char *str; + + va_start(ap, fmt); + TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed"); + va_end(ap); + + return str; +} + +#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" + +char *sys_get_cur_clocksource(void) +{ + char *clk_name; + struct stat st; + FILE *fp; + + fp = fopen(CLOCKSOURCE_PATH, "r"); + TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno); + + TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d", + errno); + + clk_name = malloc(st.st_size); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); + + TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d", + ferror(fp)); + + fclose(fp); + + return clk_name; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 2f0e2ea941cc..42151e571953 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "kvm_util.h" #include "linux/types.h" #include "linux/bitmap.h" #include "linux/atomic.h" +#include "kvm_util.h" +#include "ucall_common.h" + + #define GUEST_UCALL_FAILED -1 struct ucall_header { @@ -11,6 +14,11 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; +int ucall_nr_pages_required(uint64_t page_size) +{ + return align_up(sizeof(struct ucall_header), page_size) / page_size; +} + /* * ucall_pool holds per-VM values (global data is duplicated by each VM), it * must not be accessed from host code. @@ -24,7 +32,8 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) vm_vaddr_t vaddr; int i; - vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA); + vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + MEM_REGION_DATA); hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); memset(hdr, 0, sizeof(*hdr)); @@ -70,6 +79,45 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } +void ucall_assert(uint64_t cmd, const char *exp, const char *file, + unsigned int line, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_LINE], line); + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +void ucall_fmt(uint64_t cmd, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + void ucall(uint64_t cmd, int nargs, ...) { struct ucall *uc; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 271f63891581..5bde176cedd5 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -16,6 +13,7 @@ #include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> +#include <sys/epoll.h> #include <sys/syscall.h> #include "kvm_util.h" @@ -27,76 +25,69 @@ static void *uffd_handler_thread_fn(void *arg) { - struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; - int uffd = uffd_desc->uffd; - int pipefd = uffd_desc->pipefds[0]; - useconds_t delay = uffd_desc->delay; + struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; + int uffd = reader_args->uffd; int64_t pages = 0; struct timespec start; struct timespec ts_diff; + struct epoll_event evt; + int epollfd; + + epollfd = epoll_create(1); + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); + + evt.events = EPOLLIN | EPOLLEXCLUSIVE; + evt.data.u32 = 0; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt), + "Failed to add uffd to epollfd"); + + evt.events = EPOLLIN; + evt.data.u32 = 1; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt), + "Failed to add pipe to epollfd"); clock_gettime(CLOCK_MONOTONIC, &start); while (1) { struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; int r; - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; + r = epoll_wait(epollfd, &evt, 1, -1); + TEST_ASSERT(r == 1, + "Unexpected number of events (%d) from epoll, errno = %d", + r, errno); - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } + if (evt.data.u32 == 1) { + char tmp_chr; - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); + r = read(reader_args->pipe, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); + "Error reading pipefd in uffd reader thread"); break; } - if (!(pollfd[0].revents & POLLIN)) - continue; + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); r = read(uffd, &msg, sizeof(msg)); if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; + TEST_ASSERT(errno == EAGAIN, + "Error reading from UFFD: errno = %d", errno); + continue; } - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } + TEST_ASSERT(r == sizeof(msg), + "Read on uffd returned unexpected number of bytes (%d)", r); if (!(msg.event & UFFD_EVENT_PAGEFAULT)) continue; - if (delay) - usleep(delay); - r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); - if (r < 0) - return NULL; + if (reader_args->delay) + usleep(reader_args->delay); + r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); + TEST_ASSERT(r >= 0, + "Reader thread handler fn returned negative value %d", r); pages++; } @@ -110,6 +101,7 @@ static void *uffd_handler_thread_fn(void *arg) struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -118,14 +110,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); - TEST_ASSERT(uffd_desc, "malloc failed"); + TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); + + uffd_desc->pipefds = calloc(sizeof(int), num_readers); + TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes"); + + uffd_desc->readers = calloc(sizeof(pthread_t), num_readers); + TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads"); + + uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers); + TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args"); + + uffd_desc->num_readers = num_readers; /* In order to get minor faults, prefault via the alias. */ if (is_minor) @@ -148,18 +151,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == expected_ioctls, "missing userfaultfd ioctls"); - ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!ret, "Failed to set up pipefd"); - - uffd_desc->uffd_mode = uffd_mode; uffd_desc->uffd = uffd; - uffd_desc->delay = delay; - uffd_desc->handler = handler; - pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, - uffd_desc); + for (i = 0; i < uffd_desc->num_readers; ++i) { + int pipes[2]; - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); + ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p", + i, uffd_desc); + + uffd_desc->pipefds[i] = pipes[1]; + + uffd_desc->reader_args[i].uffd_mode = uffd_mode; + uffd_desc->reader_args[i].uffd = uffd; + uffd_desc->reader_args[i].delay = delay; + uffd_desc->reader_args[i].handler = handler; + uffd_desc->reader_args[i].pipe = pipes[0]; + + pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn, + &uffd_desc->reader_args[i]); + + PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n", + i, hva, hva + len); + } return uffd_desc; } @@ -167,19 +180,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void uffd_stop_demand_paging(struct uffd_desc *uffd) { char c = 0; - int ret; + int i; - ret = write(uffd->pipefds[1], &c, 1); - TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1, + "Unable to write to pipefd %i for uffd_desc %p", i, uffd); - ret = pthread_join(uffd->thread, NULL); - TEST_ASSERT(ret == 0, "Pthread_join failed."); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(!pthread_join(uffd->readers[i], NULL), + "Pthread_join failed on reader %i for uffd_desc %p", i, uffd); close(uffd->uffd); - close(uffd->pipefds[1]); - close(uffd->pipefds[0]); + for (i = 0; i < uffd->num_readers; ++i) { + close(uffd->pipefds[i]); + close(uffd->reader_args[i].pipe); + } + free(uffd->pipefds); + free(uffd->readers); + free(uffd->reader_args); free(uffd); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 7168e25c194e..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S index 7629819734af..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86/handlers.S diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c new file mode 100644 index 000000000000..15bc8cd583aa --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hyper-V specific functions. + * + * Copyright (C) 2021, Red Hat Inc. + */ +#include <stdint.h> +#include "processor.h" +#include "hyperv.h" + +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + +struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, + vm_vaddr_t *p_hv_pages_gva) +{ + vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); + + /* Setup of a region of guest memory for the VP Assist page. */ + hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); + hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); + + /* Setup of a region of guest memory for the partition assist page. */ + hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); + hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); + hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); + + *p_hv_pages_gva = hv_pages_gva; + return hv; +} + +int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + return 0; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index d61e623afc8c..0b1f288ad556 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * x86_64-specific extensions to memstress.c. + * x86-specific extensions to memstress.c. * * Copyright (C) 2022, Google, Inc. */ @@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); /* * Identity map the first 4G and the test region with 1G pages so that diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c new file mode 100644 index 000000000000..34cb57d1d671 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Tencent, Inc. + */ + +#include <stdint.h> + +#include <linux/kernel.h> + +#include "kvm_util.h" +#include "processor.h" +#include "pmu.h" + +const uint64_t intel_pmu_arch_events[] = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, +}; +kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); + +const uint64_t amd_pmu_zen_events[] = { + AMD_ZEN_CORE_CYCLES, + AMD_ZEN_INSTRUCTIONS_RETIRED, + AMD_ZEN_BRANCHES_RETIRED, + AMD_ZEN_BRANCHES_MISPREDICTED, +}; +kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static uint64_t get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index d4a0b504b1e0..36104d27f3d9 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1,27 +1,61 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2018, Google LLC. */ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" +#include "sev.h" #ifndef NUM_INTERRUPTS #define NUM_INTERRUPTS 256 #endif -#define DEFAULT_CODE_SELECTOR 0x8 -#define DEFAULT_DATA_SELECTOR 0x10 - -#define MAX_NR_CPUID_ENTRIES 100 +#define KERNEL_CS 0x8 +#define KERNEL_DS 0x10 +#define KERNEL_TSS 0x18 vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; +bool is_forced_emulation_enabled; +uint64_t guest_tsc_khz; + +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -124,10 +158,10 @@ bool kvm_is_tdp_enabled(void) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); - /* If needed, create page map l4 table. */ + /* If needed, create the top-level page table. */ if (!vm->pgd_created) { vm->pgd = vm_alloc_page_table(vm); vm->pgd_created = true; @@ -157,6 +191,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, { uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + paddr = vm_untag_gpa(vm, paddr); + if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; if (current_level == target_level) @@ -170,10 +206,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", + "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx\n", + "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; @@ -182,11 +218,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + uint64_t *pte = &vm->pgd; + int current_level; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % pg_size) == 0, "Virtual address not aligned,\n" @@ -200,28 +236,36 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) "Physical address beyond maximum supported,\n" " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, + "Unexpected bits in paddr: %lx", paddr); /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_create_upper_pte(vm, pte, vaddr, paddr, + current_level, level); + if (*pte & PTE_LARGE_MASK) + return; + } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); + "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + + /* + * Neither SEV nor TDX supports shared page tables, so only the final + * leaf PTE needs manually set the C/S-bit. + */ + if (vm_is_gpa_protected(vm, paddr)) + *pte |= vm->arch.c_bit; + else + *pte |= vm->arch.s_bit; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -242,6 +286,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, for (i = 0; i < nr_pages; i++) { __virt_pg_map(vm, vaddr, paddr, level); + sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, + nr_bytes / PAGE_SIZE); vaddr += pg_size; paddr += pg_size; @@ -253,7 +299,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) if (*pte & PTE_LARGE_MASK) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, - "Unexpected hugepage at level %d\n", current_level); + "Unexpected hugepage at level %d", current_level); *level = current_level; } @@ -263,37 +309,38 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level) { - uint64_t *pml4e, *pdpe, *pde; + int va_width = 12 + (vm->pgtable_levels) * 9; + uint64_t *pte = &vm->pgd; + int current_level; - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + TEST_ASSERT(!vm->arch.is_pt_protected, + "Walking page tables of protected guests is impossible"); + + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= vm->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), "Invalid virtual address, vaddr: 0x%lx", vaddr); /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), + * Check that the vaddr is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); - - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; - - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; - - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; + TEST_ASSERT(vaddr == + (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))), + "Canonical check failed. The virtual address is invalid."); + + for (current_level = vm->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_get_pte(vm, pte, vaddr, current_level); + if (vm_is_target_pte(pte, level, current_level)) + return pte; + } - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); } uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) @@ -400,7 +447,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp) static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) { - void *gdt = addr_gva2hva(vm, vm->gdt); + void *gdt = addr_gva2hva(vm, vm->arch.gdt); struct desc64 *desc = gdt + (segp->selector >> 3) * 8; desc->limit0 = segp->limit & 0xFFFF; @@ -420,27 +467,10 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->base3 = segp->base >> 32; } - -/* - * Set Long Mode Flat Kernel Code Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a code segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_CS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed @@ -449,30 +479,12 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, segp->g = true; segp->l = true; segp->present = 1; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } -/* - * Set Long Mode Flat Kernel Data Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a data segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_DS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed @@ -480,8 +492,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, */ segp->g = true; segp->present = true; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) @@ -496,74 +506,196 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) * No need for a hugepage mask on the PTE, x86-64 requires the "unused" * address bits to be zero. */ - return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level)); + return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) +static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) { - if (!vm->gdt) - vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - - dt->base = vm->gdt; - dt->limit = getpagesize(); -} - -static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, - int selector) -{ - if (!vm->tss) - vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - memset(segp, 0, sizeof(*segp)); - segp->base = vm->tss; + segp->base = base; segp->limit = 0x67; - segp->selector = selector; + segp->selector = KERNEL_TSS; segp->type = 0xb; segp->present = 1; - kvm_seg_fill_gdt_64bit(vm, segp); } -static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); + /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); - sregs.idt.limit = 0; + sregs.idt.base = vm->arch.idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->arch.gdt; + sregs.gdt.limit = getpagesize() - 1; + + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(&sregs.cs); + kvm_seg_set_kernel_data_64bit(&sregs.ds); + kvm_seg_set_kernel_data_64bit(&sregs.es); + kvm_seg_set_kernel_data_64bit(&sregs.gs); + kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vcpu, &sregs); +} + +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +static bool kvm_fixup_exception(struct ex_regs *regs) +{ + if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) + return false; - kvm_setup_gdt(vm, &sregs.gdt); + if (regs->vector == DE_VECTOR) + regs->vector = KVM_MAGIC_DE_VECTOR; - switch (vm->mode) { - case VM_MODE_PXXV48_4K: - sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; - sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + regs->rip = regs->r11; + regs->r9 = regs->vector; + regs->r10 = regs->error_code; + return true; +} - kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); - kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); - break; +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; } - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vcpu, &sregs); + if (kvm_fixup_exception(regs)) + return; + + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); +} + +static void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + struct kvm_segment seg; + int i; + + vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + + kvm_seg_set_kernel_code_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_kernel_data_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_tss_64bit(vm->arch.tss, &seg); + kvm_seg_fill_gdt_64bit(vm, &seg); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { + struct ucall uc; + + if (get_ucall(vcpu, &uc) == UCALL_ABORT) + REPORT_GUEST_ASSERT(uc); +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + int r; + + TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), + "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); + vm_create_irqchip(vm); + vm_init_descriptor_tables(vm); + sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); + + if (is_sev_vm(vm)) { + struct kvm_sev_init init = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } + + r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); + guest_tsc_khz = r; + sync_global_to_guest(vm, guest_tsc_khz); +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vcpu, ®s); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; @@ -591,19 +723,26 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - vcpu_setup(vm, vcpu); + vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; regs.rsp = stack_vaddr; - regs.rip = (unsigned long) guest_code; vcpu_regs_set(vcpu, ®s); /* Setup the MP state */ mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } @@ -752,12 +891,21 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) vcpu_set_cpuid(vcpu); } -void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr) +void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_property property, + uint32_t value) { - struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008); + struct kvm_cpuid_entry2 *entry; + + entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index); + + (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit); + (&entry->eax)[property.reg] |= value << property.lo_bit; - entry->eax = (entry->eax & ~0xff) | maxphyaddr; vcpu_set_cpuid(vcpu); + + /* Sanity check that @value doesn't exceed the bounds in any way. */ + TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); } void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) @@ -825,7 +973,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) struct kvm_regs regs; TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); @@ -1041,105 +1189,14 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) } } -static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, - int dpl, unsigned short selector) -{ - struct idt_entry *base = - (struct idt_entry *)addr_gva2hva(vm, vm->idt); - struct idt_entry *e = &base[vector]; - - memset(e, 0, sizeof(*e)); - e->offset0 = addr; - e->selector = selector; - e->ist = 0; - e->type = 14; - e->dpl = dpl; - e->p = 1; - e->offset1 = addr >> 16; - e->offset2 = addr >> 32; -} - - -static bool kvm_fixup_exception(struct ex_regs *regs) -{ - if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) - return false; - - if (regs->vector == DE_VECTOR) - return false; - - regs->rip = regs->r11; - regs->r9 = regs->vector; - regs->r10 = regs->error_code; - return true; -} - -void kvm_exit_unexpected_vector(uint32_t value) -{ - ucall(UCALL_UNHANDLED, 1, value); -} - -void route_exception(struct ex_regs *regs) -{ - typedef void(*handler)(struct ex_regs *); - handler *handlers = (handler *)exception_handlers; - - if (handlers && handlers[regs->vector]) { - handlers[regs->vector](regs); - return; - } - - if (kvm_fixup_exception(regs)) - return; - - kvm_exit_unexpected_vector(regs->vector); -} - -void vm_init_descriptor_tables(struct kvm_vm *vm) -{ - extern void *idt_handlers; - int i; - - vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - /* Handlers have the same address in both address spaces.*/ - for (i = 0; i < NUM_INTERRUPTS; i++) - set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, - DEFAULT_CODE_SELECTOR); -} - -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) -{ - struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; - - vcpu_sregs_get(vcpu, &sregs); - sregs.idt.base = vm->idt; - sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; - sregs.gdt.base = vm->gdt; - sregs.gdt.limit = getpagesize() - 1; - kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); - vcpu_sregs_set(vcpu, &sregs); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; -} - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) -{ - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); - - handlers[vector] = (vm_vaddr_t)handler; -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +void kvm_init_vm_address_properties(struct kvm_vm *vm) { - struct ucall uc; - - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { - uint64_t vector = uc.args[0]; - - TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", - vector); + if (is_sev_vm(vm)) { + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); + vm->gpa_tag_mask = vm->arch.c_bit; + } else { + vm->arch.sev_fd = -1; } } @@ -1191,72 +1248,24 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; + + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) @@ -1290,18 +1299,27 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) +void kvm_selftest_arch_init(void) { - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); + host_cpu_is_intel = this_cpu_is_intel(); + host_cpu_is_amd = this_cpu_is_amd(); + is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); - return get_kvm_intel_param_bool("unrestricted_guest"); + kvm_init_pmu_errata(); } -void kvm_selftest_arch_init(void) +bool sys_clocksource_is_based_on_tsc(void) { - host_cpu_is_intel = this_cpu_is_intel(); - host_cpu_is_amd = this_cpu_is_amd(); + char *clk_name = sys_get_cur_clocksource(); + bool ret = !strcmp(clk_name, "tsc\n") || + !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); + + free(clk_name); + + return ret; +} + +bool kvm_arch_has_default_irqchip(void) +{ + return true; } diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c new file mode 100644 index 000000000000..c3a9838f4806 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdint.h> +#include <stdbool.h> + +#include "sev.h" + +/* + * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the + * -1 would then cause an underflow back to 2**64 - 1. This is expected and + * correct. + * + * If the last range in the sparsebit is [x, y] and we try to iterate, + * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try + * and find the first range, but that's correct because the condition + * expression would cause us to quit the loop. + */ +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, + uint8_t page_type, bool private) +{ + const struct sparsebit *protected_phy_pages = region->protected_phy_pages; + const vm_paddr_t gpa_base = region->region.guest_phys_addr; + const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; + sparsebit_idx_t i, j; + + if (!sparsebit_any_set(protected_phy_pages)) + return; + + if (!is_sev_snp_vm(vm)) + sev_register_encrypted_memory(vm, region); + + sparsebit_for_each_set_range(protected_phy_pages, i, j) { + const uint64_t size = (j - i + 1) * vm->page_size; + const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; + + if (private) + vm_mem_set_private(vm, gpa_base + offset, size); + + if (is_sev_snp_vm(vm)) + snp_launch_update_data(vm, gpa_base + offset, + (uint64_t)addr_gpa2hva(vm, gpa_base + offset), + size, page_type); + else + sev_launch_update_data(vm, gpa_base + offset, size); + + } +} + +void sev_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_es_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void snp_vm_init(struct kvm_vm *vm) +{ + struct kvm_sev_init init = { 0 }; + + TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); +} + +void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) +{ + struct kvm_sev_launch_start launch_start = { + .policy = policy, + }; + struct userspace_mem_region *region; + struct kvm_sev_guest_status status; + int ctr; + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start); + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + + TEST_ASSERT_EQ(status.policy, policy); + TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false); + + if (policy & SEV_POLICY_ES) + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + vm->arch.is_pt_protected = true; +} + +void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) +{ + struct kvm_sev_launch_measure launch_measure; + struct kvm_sev_guest_status guest_status; + + launch_measure.len = 256; + launch_measure.uaddr = (__u64)measurement; + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure); + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status); + TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET); +} + +void sev_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_guest_status status; + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE || + status.state == SEV_GUEST_STATE_LAUNCH_SECRET, + "Unexpected guest state: %d", status.state); + + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL); + + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); + TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); +} + +void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy) +{ + struct kvm_sev_snp_launch_start launch_start = { + .policy = policy, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start); +} + +void snp_vm_launch_update(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + int ctr; + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true); + + vm->arch.is_pt_protected = true; +} + +void snp_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_snp_launch_finish launch_finish = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); +} + +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, + struct kvm_vcpu **cpu) +{ + struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = type, + }; + struct kvm_vm *vm; + struct kvm_vcpu *cpus[1]; + + vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); + *cpu = cpus[0]; + + return vm; +} + +void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement) +{ + if (is_sev_snp_vm(vm)) { + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); + + snp_vm_launch_start(vm, policy); + + snp_vm_launch_update(vm); + + snp_vm_launch_finish(vm); + + return; + } + + sev_vm_launch(vm, policy); + + if (!measurement) + measurement = alloca(256); + + sev_vm_launch_measure(vm, measurement); + + sev_vm_launch_finish(vm); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index 5495a92dfd5a..d239c2097391 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/svm.c * Helpers used for nested SVM testing * Largely inspired from KVM unit test svm.c * diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c new file mode 100644 index 000000000000..1265cecc7dd1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* + * FIXME: Revert this hack (the entire commit that added it) once nVMX + * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g. + * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be + * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP + * in particular is problematic) along with RDX and RDI (which are + * inputs), and clobber volatile GPRs. *sigh* + */ +#define HORRIFIC_L2_UCALL_CLOBBER_HACK \ + "rcx", "rsi", "r8", "r9", "r10", "r11" + + asm volatile("push %%rbp\n\t" + "push %%r15\n\t" + "push %%r14\n\t" + "push %%r13\n\t" + "push %%r12\n\t" + "push %%rbx\n\t" + "push %%rdx\n\t" + "push %%rdi\n\t" + "in %[port], %%al\n\t" + "pop %%rdi\n\t" + "pop %%rdx\n\t" + "pop %%rbx\n\t" + "pop %%r12\n\t" + "pop %%r13\n\t" + "pop %%r14\n\t" + "pop %%r15\n\t" + "pop %%rbp\n\t" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory", + HORRIFIC_L2_UCALL_CLOBBER_HACK); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + return (void *)regs.rdi; + } + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 59d97531c9b1..29b082a58daa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/vmx.c - * * Copyright (C) 2018, Google LLC. */ @@ -54,7 +52,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) /* KVM should return supported EVMCS version range */ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", + "Incorrect EVMCS version range: %x:%x", evmcs_ver & 0xff, evmcs_ver >> 8); return evmcs_ver; @@ -200,7 +198,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) if (vmx->eptp_gpa) { uint64_t ept_paddr; struct eptPageTablePointer eptp = { - .memory_type = VMX_BASIC_MEM_TYPE_WB, + .memory_type = X86_MEMTYPE_WB, .page_walk_length = 3, /* + 1 */ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, @@ -387,10 +385,10 @@ static void nested_create_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", + "Cannot create page table at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); } } @@ -403,11 +401,11 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; uint16_t index; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", + "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", nested_paddr); TEST_ASSERT((nested_paddr % page_size) == 0, "Nested physical address not on page boundary,\n" @@ -536,8 +534,7 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) { TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c deleted file mode 100644 index efb7e7a1354d..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Hyper-V specific functions. - * - * Copyright (C) 2021, Red Hat Inc. - */ -#include <stdint.h> -#include "processor.h" -#include "hyperv.h" - -struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) -{ - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); - struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); - - /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); - hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); - hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); - - /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); - hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); - hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); - - /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); - hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); - hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); - - *p_hv_pages_gva = hv_pages_gva; - return hv; -} - -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) -{ - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); - - current_vp_assist = vp_assist; - - return 0; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c deleted file mode 100644 index 4d41dc63cc9e..000000000000 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucall support. A ucall is a "hypercall to userspace". - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include "kvm_util.h" - -#define UCALL_PIO_PORT ((uint16_t)0x1000) - -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); -} - -void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) -{ - struct kvm_run *run = vcpu->run; - - if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { - struct kvm_regs regs; - - vcpu_regs_get(vcpu, ®s); - return (void *)regs.rdi; - } - return NULL; -} diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c new file mode 100644 index 000000000000..355ecac30954 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates periodic/one-shot constant timer IRQ using + * CSR.TCFG and CSR.TVAL registers. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static void do_idle(void) +{ + unsigned int intid; + unsigned long estat; + + __asm__ __volatile__("idle 0" : : : "memory"); + + estat = csr_read(LOONGARCH_CSR_ESTAT); + intid = !!(estat & BIT(INT_TI)); + + /* Make sure pending timer IRQ arrived */ + GUEST_ASSERT_EQ(intid, 1); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid; + uint32_t cpu = guest_get_vcpuid(); + uint64_t xcnt, val, cfg, xcnt_diff_us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + intid = !!(regs->estat & BIT(INT_TI)); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, 1); + + cfg = timer_get_cfg(); + if (cfg & CSR_TCFG_PERIOD) { + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1); + if (shared_data->nr_iter == 0) + disable_timer(); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + + /* + * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1 + * On virtual machine, its value counts down from BIT_ULL(48) - 1 + */ + val = timer_get_val(); + xcnt = timer_get_cycles(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(val > cfg, + "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx", + val, cfg, xcnt_diff_us); + + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_test_period_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = test_args.nr_iter; + shared_data->xcnt = timer_get_cycles(); + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + timer_set_next_cmp_ms(test_args.timer_period_ms, true); + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + } + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(irq_iter == 0, + "irq_iter = 0x%x.\n" + " Guest period timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + irq_iter); +} + +static void guest_test_oneshot_timer(uint32_t cpu) +{ + uint32_t irq_iter, config_iter; + uint64_t us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_test_emulate_timer(uint32_t cpu) +{ + uint32_t config_iter; + uint64_t xcnt_diff_us, us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + local_irq_disable(); + shared_data->nr_iter = 0; + us = msecs_to_usecs(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + do_idle(); + + xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt); + __GUEST_ASSERT(xcnt_diff_us >= us, + "xcnt_diff_us = 0x%lx, us = 0x%lx.\n", + xcnt_diff_us, us); + } + local_irq_enable(); +} + +static void guest_time_count_test(uint32_t cpu) +{ + uint32_t config_iter; + unsigned long start, end, prev, us; + + /* Assuming that test case starts to run in 1 second */ + start = timer_get_cycles(); + us = msec_to_cycles(1000); + __GUEST_ASSERT(start <= us, + "start = 0x%lx, us = 0x%lx.\n", + start, us); + + us = msec_to_cycles(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + start = timer_get_cycles(); + end = start + us; + /* test time count growing up always */ + while (start < end) { + prev = start; + start = timer_get_cycles(); + __GUEST_ASSERT(prev <= start, + "prev = 0x%lx, start = 0x%lx.\n", + prev, start); + } + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + + /* must run at first */ + guest_time_count_test(cpu); + + timer_irq_enable(); + local_irq_enable(); + guest_test_period_timer(cpu); + guest_test_oneshot_timer(cpu); + guest_test_emulate_timer(cpu); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9855c41ca811..3cdfa3b19b85 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2020, Google, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <sys/syscall.h> @@ -25,6 +22,7 @@ #include "processor.h" #include "test_util.h" #include "guest_modes.h" +#include "ucall_common.h" #define DUMMY_MEMSLOT_INDEX 7 @@ -45,7 +43,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory until a stop signal is received */ while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) == UCALL_SYNC) continue; @@ -56,12 +54,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { @@ -88,6 +80,7 @@ struct test_params { useconds_t delay; uint64_t nr_iterations; bool partition_vcpu_memory_access; + bool disable_slot_zap_quirk; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -98,6 +91,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); +#ifdef __x86_64__ + if (p->disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + + pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ? + "disabled" : "enabled"); +#endif pr_info("Finished creating vCPUs\n"); @@ -116,11 +116,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n" " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); guest_modes_help(); printf(" -d: add a delay between each iteration of adding and\n" " deleting a memslot in usec.\n"); + printf(" -q: Disable memslot zap quirk.\n"); printf(" -b: specify the size of the memory region which should be\n" " accessed by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -146,7 +147,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -169,6 +170,14 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; +#ifdef __x86_64__ + case 'q': + p.disable_slot_zap_quirk = true; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 'h': default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 4210cd21d159..5087d082c4b0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -25,6 +25,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <ucall_common.h> #define MEM_EXTRA_SIZE SZ_64K @@ -113,6 +114,9 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +#ifdef __x86_64__ +static bool disable_slot_zap_quirk; +#endif static bool verbose; #define pr_info_v(...) \ @@ -157,7 +161,7 @@ static void *vcpu_worker(void *__data) goto done; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -175,11 +179,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); } static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) @@ -336,7 +340,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, - "vm_phy_pages_alloc() failed\n"); + "vm_phy_pages_alloc() failed"); data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); @@ -414,7 +418,7 @@ static bool _guest_should_exit(void) */ static noinline void host_perform_sync(struct sync_area *sync) { - alarm(2); + alarm(10); atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) @@ -560,7 +564,7 @@ static void guest_code_test_memslot_rw(void) ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; - GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); + GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); *(uint64_t *)ptr = 0; } @@ -578,6 +582,11 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; +#ifdef __x86_64__ + if (disable_slot_zap_quirk) + vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); +#endif + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { @@ -896,6 +905,7 @@ static void help(char *name, struct test_args *targs) pr_info(" -h: print this help screen.\n"); pr_info(" -v: enable verbose mode (not for benchmarking).\n"); pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", targs->nslots); pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", @@ -954,7 +964,7 @@ static bool parse_args(int argc, char *argv[], uint32_t max_mem_slots; int opt; - while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case 'h': default: @@ -966,6 +976,13 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; +#ifdef __x86_64__ + case 'q': + disable_slot_zap_quirk = true; + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { @@ -1033,9 +1050,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1105,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1114,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1125,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index feaf2be20ff2..51c070556f3e 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <stdio.h> #include <stdlib.h> #include <pthread.h> @@ -17,15 +15,63 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; + int i; + + for (i = 0; i < 2; i++) { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(i); + } for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa) = gpa; + *((volatile uint64_t *)gpa); + GUEST_SYNC(2); - GUEST_DONE(); + /* + * Write to the region while mprotect(PROT_READ) is underway. Keep + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. + * + * For architectures that support skipping the faulting instruction, + * generate the store via inline assembly to ensure the exact length + * of the instruction is known and stable (vcpu_arch_put_guest() on + * fixed-length architectures should work, but the cost of paranoia + * is low in this case). For x86, hand-code the exact opcode so that + * there is no room for variability in the generated instruction. + */ + do { + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) +#ifdef __x86_64__ + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ +#elif defined(__aarch64__) + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); +#else + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); +#endif + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); + + /* + * Only architectures that write the entire range can explicitly sync, + * as other architectures will be stuck on the write fault. + */ +#if defined(__x86_64__) || defined(__aarch64__) + GUEST_SYNC(3); +#endif + + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + GUEST_SYNC(4); + + GUEST_ASSERT(0); } struct vcpu_info { @@ -36,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -52,38 +99,104 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vcpu *vcpu) +static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage) +{ + struct ucall uc; + + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], stage); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + assert_sync_stage(vcpu, stage); } static void *vcpu_worker(void *data) { + struct kvm_sregs __maybe_unused sregs; struct vcpu_info *info = data; struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; - struct kvm_regs regs; + int r; vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); - /* Snapshot regs before the first run. */ - vcpu_regs_get(vcpu, ®s); rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 0, write all of guest memory. */ + run_vcpu(vcpu, 0); rendezvous_with_boss(); - vcpu_regs_set(vcpu, ®s); - vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ + vcpu_sregs_get(vcpu, &sregs); /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; -#endif vcpu_sregs_set(vcpu, &sregs); +#endif rendezvous_with_boss(); - run_vcpu(vcpu); + /* Stage 1, re-write all of guest memory. */ + run_vcpu(vcpu, 1); + rendezvous_with_boss(); + + /* Stage 2, read all of guest memory, which is now read-only. */ + run_vcpu(vcpu, 2); + + /* + * Stage 3, write guest memory and verify KVM returns -EFAULT for once + * the mprotect(PROT_READ) lands. Only architectures that support + * validating *all* of guest memory sync for this stage, as vCPUs will + * be stuck on the faulting instruction for other architectures. Go to + * stage 3 without a rendezvous + */ + r = _vcpu_run(vcpu); + TEST_ASSERT(r == -1 && errno == EFAULT, + "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + +#if defined(__x86_64__) || defined(__aarch64__) + /* + * Verify *all* writes from the guest hit EFAULT due to the VMA now + * being read-only. x86 and arm64 only at this time as skipping the + * instruction that hits the EFAULT requires advancing the program + * counter, which is arch specific and relies on inline assembly. + */ +#ifdef __x86_64__ + vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS; +#endif + for (;;) { + r = _vcpu_run(vcpu); + if (!r) + break; + TEST_ASSERT_EQ(errno, EFAULT); +#if defined(__x86_64__) + WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); + vcpu->run->s.regs.regs.rip += 3; +#elif defined(__aarch64__) + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); +#endif + + } + assert_sync_stage(vcpu, 3); +#endif /* __x86_64__ || __aarch64__ */ + rendezvous_with_boss(); + + /* + * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to + * make the memory writable again. + */ + do { + r = _vcpu_run(vcpu); + } while (r && errno == EFAULT); + TEST_ASSERT_EQ(r, 0); + assert_sync_stage(vcpu, 4); rendezvous_with_boss(); return NULL; @@ -150,8 +263,10 @@ static void calc_default_nr_vcpus(void) TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); - nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + nr_vcpus = CPU_COUNT(&possible_mask); TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); + if (nr_vcpus >= 2) + nr_vcpus = nr_vcpus * 3/4; } int main(int argc, char *argv[]) @@ -166,7 +281,7 @@ int main(int argc, char *argv[]) const uint64_t start_gpa = SZ_4G; const int first_slot = 1; - struct timespec time_start, time_run1, time_reset, time_run2; + struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; @@ -214,14 +329,19 @@ int main(int argc, char *argv[]) vcpus = malloc(nr_vcpus * sizeof(*vcpus)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); - vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, +#ifdef __x86_64__ + max_mem / SZ_1G, +#else + max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size, +#endif + guest_code, vcpus); max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); @@ -242,11 +362,9 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); + virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); + virt_map(vm, gpa, gpa, slot_size >> vm->page_shift); #endif } @@ -264,14 +382,27 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_reset, "reset"); rendezvous_with_vcpus(&time_run2, "run 2"); + mprotect(mem, slot_size, PROT_READ); + mprotect_ro_done = true; + sync_global_to_guest(vm, mprotect_ro_done); + + rendezvous_with_vcpus(&time_ro, "mprotect RO"); + mprotect(mem, slot_size, PROT_READ | PROT_WRITE); + rendezvous_with_vcpus(&time_rw, "mprotect RW"); + + time_rw = timespec_sub(time_rw, time_ro); + time_ro = timespec_sub(time_ro, time_run2); time_run2 = timespec_sub(time_run2, time_reset); - time_reset = timespec_sub(time_reset, time_run1); + time_reset = timespec_sub(time_reset, time_run1); time_run1 = timespec_sub(time_run1, time_start); - pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n", + pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, " + "ro = %ld.%.9lds, rw = %ld.%.9lds\n", time_run1.tv_sec, time_run1.tv_nsec, time_reset.tv_sec, time_reset.tv_nsec, - time_run2.tv_sec, time_run2.tv_nsec); + time_run2.tv_sec, time_run2.tv_nsec, + time_ro.tv_sec, time_ro.tv_nsec, + time_rw.tv_sec, time_rw.tv_nsec); /* * Delete even numbered slots (arbitrary) and unmap the first half of @@ -281,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c new file mode 100644 index 000000000000..93e603d91311 --- /dev/null +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Intel, Inc + * + * Author: + * Isaku Yamahata <isaku.yamahata at gmail.com> + */ +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <pthread.h> + +/* Arbitrarily chosen values */ +#define TEST_SIZE (SZ_2M + PAGE_SIZE) +#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) +#define TEST_SLOT 10 + +static void guest_code(uint64_t base_gva) +{ + volatile uint64_t val __used; + int i; + + for (i = 0; i < TEST_NPAGES; i++) { + uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE); + + val = *src; + } + + GUEST_DONE(); +} + +struct slot_worker_data { + struct kvm_vm *vm; + u64 gpa; + uint32_t flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) +{ + struct kvm_pre_fault_memory range = { + .gpa = base_gpa + offset, + .size = size, + .flags = 0, + }; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; + int ret, save_errno; + u64 prev; + + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); + + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); + + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { + prev = range.size; + ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); + save_errno = errno; + TEST_ASSERT((range.size < prev) ^ (ret < 0), + "%sexpecting range.size to change on %s", + ret < 0 ? "not " : "", + ret < 0 ? "failure" : "success"); + + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; + + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } + + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); + else + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); +} + +static void __test_pre_fault_memory(unsigned long vm_type, bool private) +{ + uint64_t gpa, gva, alignment, guest_page_size; + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = vm_type, + }; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); + + alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size; +#ifdef __s390x__ + alignment = max(0x100000UL, guest_page_size); +#else + alignment = SZ_2M; +#endif + gpa = align_down(gpa, alignment); + gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT, + TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, gva, gpa, TEST_NPAGES); + + if (private) + vm_mem_set_private(vm, gpa, TEST_SIZE); + + pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); + + vcpu_args_set(vcpu, 1, gva); + vcpu_run(vcpu); + + run = vcpu->run; + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + break; + } + + kvm_vm_free(vm); +} + +static void test_pre_fault_memory(unsigned long vm_type, bool private) +{ + if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { + pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); + return; + } + + __test_pre_fault_memory(vm_type, private); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); + + test_pre_fault_memory(0, false); +#ifdef __x86_64__ + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); +#endif + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c new file mode 100644 index 000000000000..f962fefc48fa --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality + * + * The test validates the sstc timer IRQs using vstimecmp registers. + * It's ported from the aarch64 arch_timer test. + * + * Copyright (c) 2024, Intel Corporation. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static int timer_irq = IRQ_S_TIMER; + +static void guest_irq_handler(struct pt_regs *regs) +{ + uint64_t xcnt, xcnt_diff_us, cmp; + unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + timer_irq_disable(); + + xcnt = timer_get_cycles(); + cmp = timer_get_cmp(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, timer_irq); + + __GUEST_ASSERT(xcnt >= cmp, + "xcnt = 0x%"PRIx64", cmp = 0x%"PRIx64", xcnt_diff_us = 0x%" PRIx64, + xcnt, cmp, xcnt_diff_us); + + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_run(struct test_vcpu_shared_data *shared_data) +{ + uint32_t irq_iter, config_iter; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms); + shared_data->xcnt = timer_get_cycles(); + timer_irq_enable(); + + /* Setup a timeout for the interrupt to arrive */ + udelay(msecs_to_usecs(test_args.timer_period_ms) + + test_args.timer_err_margin_us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_code(void) +{ + uint32_t cpu = guest_get_vcpuid(); + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + timer_irq_disable(); + local_irq_enable(); + + guest_run(shared_data); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC), + "SSTC not available, skipping test\n"); + + vm_init_vector_tables(vm); + vm_install_interrupt_handler(vm, guest_irq_handler); + + for (int i = 0; i < nr_vcpus; i++) + vcpu_init_vector_tables(vcpus[i]); + + /* Initialize guest timer frequency. */ + timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency)); + sync_global_to_guest(vm, timer_freq); + pr_debug("timer_freq: %lu\n", timer_freq); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c new file mode 100644 index 000000000000..739d17befb5a --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V KVM ebreak test. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include "kvm_util.h" +#include "ucall_common.h" + +#define LABEL_ADDRESS(v) ((uint64_t)&(v)) + +extern unsigned char sw_bp_1, sw_bp_2; +static uint64_t sw_bp_addr; + +static void guest_code(void) +{ + asm volatile( + ".option push\n" + ".option norvc\n" + "sw_bp_1: ebreak\n" + "sw_bp_2: ebreak\n" + ".option pop\n" + ); + GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2)); + + GUEST_DONE(); +} + +static void guest_breakpoint_handler(struct pt_regs *regs) +{ + WRITE_ONCE(sw_bp_addr, regs->epc); + regs->epc += 4; +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t pc; + struct kvm_guest_debug debug = { + .control = KVM_GUESTDBG_ENABLE, + }; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_vector_tables(vm); + vcpu_init_vector_tables(vcpu); + vm_install_exception_handler(vm, EXC_BREAKPOINT, + guest_breakpoint_handler); + + /* + * Enable the guest debug. + * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason. + */ + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + + pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); + TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); + + /* skip sw_bp_1 */ + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4); + + /* + * Disable all debug controls. + * Guest should handle the ebreak without exiting to the VMM. + */ + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c new file mode 100644 index 000000000000..cb54a56990a0 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -0,0 +1,1302 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (c) 2023 Intel Corporation + * + */ +#include <stdio.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) + +enum { + VCPU_FEATURE_ISA_EXT = 0, + VCPU_FEATURE_SBI_EXT, +}; + +enum { + KVM_RISC_V_REG_OFFSET_VSTART = 0, + KVM_RISC_V_REG_OFFSET_VL, + KVM_RISC_V_REG_OFFSET_VTYPE, + KVM_RISC_V_REG_OFFSET_VCSR, + KVM_RISC_V_REG_OFFSET_VLENB, + KVM_RISC_V_REG_OFFSET_MAX, +}; + +static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; + +bool filter_reg(__u64 reg) +{ + switch (reg & ~REG_MASK) { + /* + * Same set of ISA_EXT registers are not present on all host because + * ISA_EXT registers are visible to the KVM user space based on the + * ISA extensions available on the host. Also, disabling an ISA + * extension using corresponding ISA_EXT register does not affect + * the visibility of the ISA_EXT register itself. + * + * Based on above, we should filter-out all ISA_EXT registers. + * + * Note: The below list is alphabetically sorted. + */ + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_A: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_C: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_H: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMNPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSNPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT: + /* + * Like ISA_EXT registers, SBI_EXT registers are only visible when the + * host supports them and disabling them does not affect the visibility + * of the SBI_EXT register itself. + */ + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: + return true; + /* AIA registers are always available when Ssaia can't be disabled */ + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA]; + default: + break; + } + + return false; +} + +bool check_reject_set(int err) +{ + return err == EINVAL; +} + +static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, + uint64_t feature) +{ + unsigned long vlenb_reg = 0; + int rc; + u64 reg, size; + + /* Enable V extension so that we can get the vlenb register */ + rc = __vcpu_set_reg(vcpu, feature, 1); + if (rc) + return rc; + + vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]); + if (!vlenb_reg) { + TEST_FAIL("Can't compute vector register size from zero vlenb\n"); + return -EPERM; + } + + size = __builtin_ctzl(vlenb_reg); + size <<= KVM_REG_SIZE_SHIFT; + + for (int i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i); + s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg; + } + + /* We should assert if disabling failed here while enabling succeeded before */ + vcpu_set_reg(vcpu, feature, 0); + + return 0; +} + +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; + struct vcpu_reg_sublist *s; + uint64_t feature; + int rc; + + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) + __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]); + + /* + * Disable all extensions which were enabled by default + * if they were available in the risc-v host. + */ + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + if (rc && isa_ext_state[i]) + isa_ext_cant_disable[i] = true; + } + + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_SBI_EXT_REG(i), 0); + TEST_ASSERT(!rc || (rc == -1 && errno == ENOENT), "Unexpected error"); + } + + for_each_sublist(c, s) { + if (!s->feature) + continue; + + if (s->feature == KVM_RISCV_ISA_EXT_V) { + feature = RISCV_ISA_EXT_REG(s->feature); + rc = override_vector_reg_size(vcpu, s, feature); + if (rc) + goto skip; + } + + switch (s->feature_type) { + case VCPU_FEATURE_ISA_EXT: + feature = RISCV_ISA_EXT_REG(s->feature); + break; + case VCPU_FEATURE_SBI_EXT: + feature = RISCV_SBI_EXT_REG(s->feature); + break; + default: + TEST_FAIL("Unknown feature type"); + } + + /* Try to enable the desired extension */ + __vcpu_set_reg(vcpu, feature, 1); + +skip: + /* Double check whether the desired extension was enabled */ + __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature), + "%s not available, skipping tests", s->name); + } +} + +static const char *config_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_config */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG); + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG); + + switch (reg_off) { + case KVM_REG_RISCV_CONFIG_REG(isa): + return "KVM_REG_RISCV_CONFIG_REG(isa)"; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(mvendorid): + return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; + case KVM_REG_RISCV_CONFIG_REG(marchid): + return "KVM_REG_RISCV_CONFIG_REG(marchid)"; + case KVM_REG_RISCV_CONFIG_REG(mimpid): + return "KVM_REG_RISCV_CONFIG_REG(mimpid)"; + case KVM_REG_RISCV_CONFIG_REG(satp_mode): + return "KVM_REG_RISCV_CONFIG_REG(satp_mode)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +static const char *core_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_core */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE); + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE); + + switch (reg_off) { + case KVM_REG_RISCV_CORE_REG(regs.pc): + return "KVM_REG_RISCV_CORE_REG(regs.pc)"; + case KVM_REG_RISCV_CORE_REG(regs.ra): + return "KVM_REG_RISCV_CORE_REG(regs.ra)"; + case KVM_REG_RISCV_CORE_REG(regs.sp): + return "KVM_REG_RISCV_CORE_REG(regs.sp)"; + case KVM_REG_RISCV_CORE_REG(regs.gp): + return "KVM_REG_RISCV_CORE_REG(regs.gp)"; + case KVM_REG_RISCV_CORE_REG(regs.tp): + return "KVM_REG_RISCV_CORE_REG(regs.tp)"; + case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t0)); + case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s0)); + case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.a0)); + case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2); + case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3); + case KVM_REG_RISCV_CORE_REG(mode): + return "KVM_REG_RISCV_CORE_REG(mode)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +#define RISCV_CSR_GENERAL(csr) \ + "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")" +#define RISCV_CSR_AIA(csr) \ + "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")" +#define RISCV_CSR_SMSTATEEN(csr) \ + "KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")" + +static const char *general_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_REG(sstatus): + return RISCV_CSR_GENERAL(sstatus); + case KVM_REG_RISCV_CSR_REG(sie): + return RISCV_CSR_GENERAL(sie); + case KVM_REG_RISCV_CSR_REG(stvec): + return RISCV_CSR_GENERAL(stvec); + case KVM_REG_RISCV_CSR_REG(sscratch): + return RISCV_CSR_GENERAL(sscratch); + case KVM_REG_RISCV_CSR_REG(sepc): + return RISCV_CSR_GENERAL(sepc); + case KVM_REG_RISCV_CSR_REG(scause): + return RISCV_CSR_GENERAL(scause); + case KVM_REG_RISCV_CSR_REG(stval): + return RISCV_CSR_GENERAL(stval); + case KVM_REG_RISCV_CSR_REG(sip): + return RISCV_CSR_GENERAL(sip); + case KVM_REG_RISCV_CSR_REG(satp): + return RISCV_CSR_GENERAL(satp); + case KVM_REG_RISCV_CSR_REG(scounteren): + return RISCV_CSR_GENERAL(scounteren); + case KVM_REG_RISCV_CSR_REG(senvcfg): + return RISCV_CSR_GENERAL(senvcfg); + } + + return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off); +} + +static const char *aia_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_aia_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_AIA_REG(siselect): + return RISCV_CSR_AIA(siselect); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1): + return RISCV_CSR_AIA(iprio1); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2): + return RISCV_CSR_AIA(iprio2); + case KVM_REG_RISCV_CSR_AIA_REG(sieh): + return RISCV_CSR_AIA(sieh); + case KVM_REG_RISCV_CSR_AIA_REG(siph): + return RISCV_CSR_AIA(siph); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + return RISCV_CSR_AIA(iprio1h); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return RISCV_CSR_AIA(iprio2h); + } + + return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off); +} + +static const char *smstateen_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_smstateen_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0): + return RISCV_CSR_SMSTATEEN(sstateen0); + } + + TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off); + return NULL; +} + +static const char *csr_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + return general_csr_id_to_str(reg_off); + case KVM_REG_RISCV_CSR_AIA: + return aia_csr_id_to_str(reg_off); + case KVM_REG_RISCV_CSR_SMSTATEEN: + return smstateen_csr_id_to_str(reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +static const char *timer_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_timer */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER); + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER); + + switch (reg_off) { + case KVM_REG_RISCV_TIMER_REG(frequency): + return "KVM_REG_RISCV_TIMER_REG(frequency)"; + case KVM_REG_RISCV_TIMER_REG(time): + return "KVM_REG_RISCV_TIMER_REG(time)"; + case KVM_REG_RISCV_TIMER_REG(compare): + return "KVM_REG_RISCV_TIMER_REG(compare)"; + case KVM_REG_RISCV_TIMER_REG(state): + return "KVM_REG_RISCV_TIMER_REG(state)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +static const char *fp_f_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_f_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F); + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F); + + switch (reg_off) { + case KVM_REG_RISCV_FP_F_REG(f[0]) ... + KVM_REG_RISCV_FP_F_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_F_REG(fcsr): + return "KVM_REG_RISCV_FP_F_REG(fcsr)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +static const char *fp_d_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_d_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D); + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D); + + switch (reg_off) { + case KVM_REG_RISCV_FP_D_REG(f[0]) ... + KVM_REG_RISCV_FP_D_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_D_REG(fcsr): + return "KVM_REG_RISCV_FP_D_REG(fcsr)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +static const char *vector_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_v_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR); + int reg_index = 0; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR); + + if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0)) + reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0); + switch (reg_off) { + case KVM_REG_RISCV_VECTOR_REG(0) ... + KVM_REG_RISCV_VECTOR_REG(31): + return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index); + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + +#define KVM_ISA_EXT_ARR(ext) \ +[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext + +static const char *isa_ext_single_id_to_str(__u64 reg_off) +{ + static const char * const kvm_isa_ext_reg_name[] = { + KVM_ISA_EXT_ARR(A), + KVM_ISA_EXT_ARR(C), + KVM_ISA_EXT_ARR(D), + KVM_ISA_EXT_ARR(F), + KVM_ISA_EXT_ARR(H), + KVM_ISA_EXT_ARR(I), + KVM_ISA_EXT_ARR(M), + KVM_ISA_EXT_ARR(V), + KVM_ISA_EXT_ARR(SMNPM), + KVM_ISA_EXT_ARR(SMSTATEEN), + KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVNAPOT), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), + KVM_ISA_EXT_ARR(ZABHA), + KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), + KVM_ISA_EXT_ARR(ZAWRS), + KVM_ISA_EXT_ARR(ZBA), + KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBC), + KVM_ISA_EXT_ARR(ZBKB), + KVM_ISA_EXT_ARR(ZBKC), + KVM_ISA_EXT_ARR(ZBKX), + KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCMOP), + KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), + KVM_ISA_EXT_ARR(ZFH), + KVM_ISA_EXT_ARR(ZFHMIN), + KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), + KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), + KVM_ISA_EXT_ARR(ZICNTR), + KVM_ISA_EXT_ARR(ZICOND), + KVM_ISA_EXT_ARR(ZICSR), + KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTNTL), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZIMOP), + KVM_ISA_EXT_ARR(ZKND), + KVM_ISA_EXT_ARR(ZKNE), + KVM_ISA_EXT_ARR(ZKNH), + KVM_ISA_EXT_ARR(ZKR), + KVM_ISA_EXT_ARR(ZKSED), + KVM_ISA_EXT_ARR(ZKSH), + KVM_ISA_EXT_ARR(ZKT), + KVM_ISA_EXT_ARR(ZTSO), + KVM_ISA_EXT_ARR(ZVBB), + KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), + KVM_ISA_EXT_ARR(ZVFH), + KVM_ISA_EXT_ARR(ZVFHMIN), + KVM_ISA_EXT_ARR(ZVKB), + KVM_ISA_EXT_ARR(ZVKG), + KVM_ISA_EXT_ARR(ZVKNED), + KVM_ISA_EXT_ARR(ZVKNHA), + KVM_ISA_EXT_ARR(ZVKNHB), + KVM_ISA_EXT_ARR(ZVKSED), + KVM_ISA_EXT_ARR(ZVKSH), + KVM_ISA_EXT_ARR(ZVKT), + }; + + if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) + return strdup_printf("KVM_REG_RISCV_ISA_SINGLE | %lld /* UNKNOWN */", reg_off); + + return kvm_isa_ext_reg_name[reg_off]; +} + +static const char *isa_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) +{ + const char *unknown = ""; + + if (reg_off > KVM_REG_RISCV_ISA_MULTI_REG_LAST) + unknown = " /* UNKNOWN */"; + + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_MULTI_EN: + return strdup_printf("KVM_REG_RISCV_ISA_MULTI_EN | %lld%s", reg_off, unknown); + case KVM_REG_RISCV_ISA_MULTI_DIS: + return strdup_printf("KVM_REG_RISCV_ISA_MULTI_DIS | %lld%s", reg_off, unknown); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +static const char *isa_ext_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_SINGLE: + return isa_ext_single_id_to_str(reg_off); + case KVM_REG_RISCV_ISA_MULTI_EN: + case KVM_REG_RISCV_ISA_MULTI_DIS: + return isa_ext_multi_id_to_str(reg_subtype, reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +#define KVM_SBI_EXT_ARR(ext) \ +[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext + +static const char *sbi_ext_single_id_to_str(__u64 reg_off) +{ + /* reg_off is KVM_RISCV_SBI_EXT_ID */ + static const char * const kvm_sbi_ext_reg_name[] = { + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), + }; + + if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) + return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off); + + return kvm_sbi_ext_reg_name[reg_off]; +} + +static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) +{ + const char *unknown = ""; + + if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) + unknown = " /* UNKNOWN */"; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_MULTI_EN: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown); + case KVM_REG_RISCV_SBI_MULTI_DIS: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_SINGLE: + return sbi_ext_single_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_SBI_MULTI_DIS: + return sbi_ext_multi_id_to_str(reg_subtype, reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +static const char *sbi_sta_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo)"; + case 1: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); +} + +static const char *sbi_fwft_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)"; + case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)"; + case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)"; + case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)"; + case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)"; + case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off); +} + +static const char *sbi_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_STATE); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + return sbi_sta_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_FWFT: + return sbi_fwft_id_to_str(reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +void print_reg(const char *prefix, __u64 id) +{ + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV, + "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + default: + printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); + return; + } + + switch (id & KVM_REG_RISCV_TYPE_MASK) { + case KVM_REG_RISCV_CONFIG: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n", + reg_size, config_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_CORE: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n", + reg_size, core_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_CSR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n", + reg_size, csr_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_TIMER: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n", + reg_size, timer_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_F: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n", + reg_size, fp_f_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_D: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", + reg_size, fp_d_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_VECTOR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n", + reg_size, vector_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_ISA_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", + reg_size, isa_ext_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_SBI_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n", + reg_size, sbi_ext_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_SBI_STATE: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_STATE | %s,\n", + reg_size, sbi_id_to_str(prefix, id)); + break; + default: + printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n", + reg_size, id & ~REG_MASK); + return; + } +} + +/* + * The current blessed list was primed with the output of kernel version + * v6.5-rc3 and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), +}; + +/* + * The skips_set list registers that should skip set test. + * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly. + */ +static __u64 base_skips_set[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), +}; + +static __u64 sbi_base_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, +}; + +static __u64 sbi_sta_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), +}; + +static __u64 sbi_fwft_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value), +}; + +static __u64 zicbom_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, +}; + +static __u64 zicbop_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP, +}; + +static __u64 zicboz_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, +}; + +static __u64 aia_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA, +}; + +static __u64 smstateen_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN, +}; + +static __u64 fp_f_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F, +}; + +static __u64 fp_d_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, +}; + +/* Define a default vector registers with length. This will be overwritten at runtime */ +static __u64 vector_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V, +}; + +#define SUBLIST_BASE \ + {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ + .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} +#define SUBLIST_SBI_BASE \ + {"sbi-base", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_V01, \ + .regs = sbi_base_regs, .regs_n = ARRAY_SIZE(sbi_base_regs),} +#define SUBLIST_SBI_STA \ + {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ + .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_SBI_FWFT \ + {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \ + .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),} +#define SUBLIST_ZICBOM \ + {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define SUBLIST_ZICBOP \ + {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),} +#define SUBLIST_ZICBOZ \ + {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} +#define SUBLIST_AIA \ + {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),} +#define SUBLIST_SMSTATEEN \ + {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),} +#define SUBLIST_FP_F \ + {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \ + .regs_n = ARRAY_SIZE(fp_f_regs),} +#define SUBLIST_FP_D \ + {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ + .regs_n = ARRAY_SIZE(fp_d_regs),} + +#define SUBLIST_V \ + {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),} + +#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ +static __u64 regs_##ext[] = { \ + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ + KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | \ + KVM_RISCV_ISA_EXT_##extu, \ +}; \ +static struct vcpu_reg_list config_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + { \ + .name = #ext, \ + .feature = KVM_RISCV_ISA_EXT_##extu, \ + .regs = regs_##ext, \ + .regs_n = ARRAY_SIZE(regs_##ext), \ + }, \ + {0}, \ + }, \ +} \ + +#define KVM_SBI_EXT_SIMPLE_CONFIG(ext, extu) \ +static __u64 regs_sbi_##ext[] = { \ + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ + KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | \ + KVM_RISCV_SBI_EXT_##extu, \ +}; \ +static struct vcpu_reg_list config_sbi_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + { \ + .name = "sbi-"#ext, \ + .feature_type = VCPU_FEATURE_SBI_EXT, \ + .feature = KVM_RISCV_SBI_EXT_##extu, \ + .regs = regs_sbi_##ext, \ + .regs_n = ARRAY_SIZE(regs_sbi_##ext), \ + }, \ + {0}, \ + }, \ +} \ + +#define KVM_ISA_EXT_SUBLIST_CONFIG(ext, extu) \ +static struct vcpu_reg_list config_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + SUBLIST_##extu, \ + {0}, \ + }, \ +} \ + +#define KVM_SBI_EXT_SUBLIST_CONFIG(ext, extu) \ +static struct vcpu_reg_list config_sbi_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + SUBLIST_SBI_##extu, \ + {0}, \ + }, \ +} \ + +/* Note: The below list is alphabetically sorted. */ + +KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); +KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); +KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); +KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); +KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); +KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY); +KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT); + +KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); +KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); +KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SUBLIST_CONFIG(v, V); +KVM_ISA_EXT_SIMPLE_CONFIG(h, H); +KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); +KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); +KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); +KVM_ISA_EXT_SIMPLE_CONFIG(ssnpm, SSNPM); +KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE); +KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); +KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); +KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); +KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); +KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); +KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); +KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); +KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); +KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); +KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); +KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); +KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP); +KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); +KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); +KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); +KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); +KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); +KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); +KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); +KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP); +KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); +KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); +KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR); +KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); +KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT); + +struct vcpu_reg_list *vcpu_configs[] = { + &config_sbi_base, + &config_sbi_sta, + &config_sbi_pmu, + &config_sbi_dbcn, + &config_sbi_susp, + &config_sbi_mpxy, + &config_sbi_fwft, + &config_aia, + &config_fp_f, + &config_fp_d, + &config_h, + &config_v, + &config_smnpm, + &config_smstateen, + &config_sscofpmf, + &config_ssnpm, + &config_sstc, + &config_svade, + &config_svadu, + &config_svinval, + &config_svnapot, + &config_svpbmt, + &config_svvptc, + &config_zaamo, + &config_zabha, + &config_zacas, + &config_zalrsc, + &config_zawrs, + &config_zba, + &config_zbb, + &config_zbc, + &config_zbkb, + &config_zbkc, + &config_zbkx, + &config_zbs, + &config_zca, + &config_zcb, + &config_zcd, + &config_zcf, + &config_zcmop, + &config_zfa, + &config_zfbfmin, + &config_zfh, + &config_zfhmin, + &config_zicbom, + &config_zicbop, + &config_zicboz, + &config_ziccrse, + &config_zicntr, + &config_zicond, + &config_zicsr, + &config_zifencei, + &config_zihintntl, + &config_zihintpause, + &config_zihpm, + &config_zimop, + &config_zknd, + &config_zkne, + &config_zknh, + &config_zkr, + &config_zksed, + &config_zksh, + &config_zkt, + &config_ztso, + &config_zvbb, + &config_zvbc, + &config_zvfbfmin, + &config_zvfbfwma, + &config_zvfh, + &config_zvfhmin, + &config_zvkb, + &config_zvkg, + &config_zvkned, + &config_zvknha, + &config_zvknhb, + &config_zvksed, + &config_zvksh, + &config_zvkt, +}; +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c new file mode 100644 index 000000000000..924a335d2262 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -0,0 +1,731 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality. + * + * Copyright (c) 2024, Rivos Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" +#include "sbi.h" +#include "arch_timer.h" +#include "ucall_common.h" + +/* Maximum counters(firmware + hardware) */ +#define RISCV_MAX_PMU_COUNTERS 64 +union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS]; + +/* Snapshot shared memory data */ +#define PMU_SNAPSHOT_GPA_BASE BIT(30) +static void *snapshot_gva; +static vm_paddr_t snapshot_gpa; + +static int vcpu_shared_irq_count; +static int counter_in_use; + +/* Cache the available counters in a bitmask */ +static unsigned long counter_mask_available; + +static bool illegal_handler_invoked; + +#define SBI_PMU_TEST_BASIC BIT(0) +#define SBI_PMU_TEST_EVENTS BIT(1) +#define SBI_PMU_TEST_SNAPSHOT BIT(2) +#define SBI_PMU_TEST_OVERFLOW BIT(3) + +#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5 +struct test_args { + int disabled_tests; + int overflow_irqnum; +}; + +static struct test_args targs; + +unsigned long pmu_csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default : + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static inline void dummy_func_loop(uint64_t iter) +{ + int i = 0; + + while (i < iter) { + asm volatile("nop"); + i++; + } +} + +static void start_counter(unsigned long counter, unsigned long start_flags, + unsigned long ival) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags, + ival, 0, 0); + __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter); +} + +/* This should be invoked only for reset counter use case */ +static void stop_reset_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, + stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld\n", counter); +} + +static void stop_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, + 0, 0, 0); + __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld error %ld\n", counter, ret.error); +} + +static void guest_illegal_exception_handler(struct pt_regs *regs) +{ + unsigned long insn; + int opcode, csr_num, funct3; + + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, + "Unexpected exception handler %lx\n", regs->cause); + + insn = regs->badaddr; + opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT; + __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM, + "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn); + + csr_num = GET_CSR_NUM(insn); + funct3 = GET_RM(insn); + /* Validate if it is a CSR read/write operation */ + __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4), + "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n", + funct3, csr_num); + + /* Validate if it is a HPMCOUNTER CSR operation */ + __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31), + "Unexpected csr_num 0x%x\n", csr_num); + + illegal_handler_invoked = true; + /* skip the trapping instruction */ + regs->epc += 4; +} + +static void guest_irq_handler(struct pt_regs *regs) +{ + unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + unsigned long overflown_mask; + + /* Validate that we are in the correct irq handler */ + GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); + + /* Stop all counters first to avoid further interrupts */ + stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF)); + + overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask); + GUEST_ASSERT(overflown_mask & 0x01); + + WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); +} + +static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, + unsigned long cflags, + unsigned long event) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, event, 0, 0); + __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error); + GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS); + GUEST_ASSERT(BIT(ret.value) & counter_mask_available); + + return ret.value; +} + +static unsigned long get_num_counters(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); + + __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU"); + __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS, + "Invalid number of counters %ld\n", ret.value); + + return ret.value; +} + +static void update_counter_info(int num_counters) +{ + int i = 0; + struct sbiret ret; + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo_arr[i].value = ret.value; + counter_mask_available |= BIT(i); + } + + GUEST_ASSERT(counter_mask_available > 0); +} + +static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0); + GUEST_ASSERT(ret.error == 0); + return ret.value; +} + +static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + unsigned long counter_val = 0; + + __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type); + + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) + counter_val = pmu_csr_read_num(ctrinfo.csr); + else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) + counter_val = read_fw_counter(idx, ctrinfo); + + return counter_val; +} + +static inline void verify_sbi_requirement_assert(void) +{ + long out_val = 0; + bool probe; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + if (get_host_sbi_spec_version() < sbi_mk_version(2, 0)) + __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot"); +} + +static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags) +{ + unsigned long lo = (unsigned long)gpa; +#if __riscv_xlen == 32 + unsigned long hi = (unsigned long)(gpa >> 32); +#else + unsigned long hi = gpa == -1 ? -1 : 0; +#endif + struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + + GUEST_ASSERT(ret.value == 0 && ret.error == 0); +} + +static void test_pmu_event(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2); + stop_counter(counter, 0); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + stop_counter(counter, 0); + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_snapshot(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + /* The counter value is updated w.r.t relative index of cbase */ + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]); + + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_overflow(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_post; + unsigned long counter_init_value = ULONG_MAX - 10000; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_in_use = counter; + + /* The counter value is updated w.r.t relative index of cbase passed to start/stop */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + udelay(msecs_to_usecs(2000)); + /* irq handler should have stopped the counter */ + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + /* The counter value after stopping should be less the init value due to overflow */ + __GUEST_ASSERT(counter_value_post < counter_init_value, + "counter_value_post %lx counter_init_value %lx for counter\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_invalid_event(void) +{ + struct sbiret ret; + unsigned long event = 0x1234; /* A random event */ + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0, + counter_mask_available, 0, event, 0, 0); + GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED); +} + +static void test_pmu_events(void) +{ + int num_counters = 0; + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Sanity testing for any random invalid event */ + test_invalid_event(); + + /* Only these two events are guaranteed to be present */ + test_pmu_event(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_basic_sanity(void) +{ + long out_val = 0; + bool probe; + struct sbiret ret; + int num_counters = 0, i; + union sbi_pmu_ctr_info ctrinfo; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + num_counters = get_num_counters(); + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, + 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo.value = ret.value; + + /** + * Accessibility check of hardware and read capability of firmware counters. + * The spec doesn't mandate any initial value. No need to check any value. + */ + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) { + pmu_csr_read_num(ctrinfo.csr); + GUEST_ASSERT(illegal_handler_invoked); + } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { + read_fw_counter(i, ctrinfo); + } + } + + GUEST_DONE(); +} + +static void test_pmu_events_snaphost(void) +{ + int num_counters = 0; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + int i; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Validate shared memory access */ + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0); + for (i = 0; i < num_counters; i++) { + if (counter_mask_available & (BIT(i))) + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0); + } + /* Only these two events are guranteed to be present */ + test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_events_overflow(void) +{ + int num_counters = 0, i = 0; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + csr_set(CSR_IE, BIT(IRQ_PMU_OVF)); + local_irq_enable(); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* + * Qemu supports overflow for cycle/instruction. + * This test may fail on any platform that do not support overflow for these two events. + */ + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); + + vcpu_shared_irq_count = 0; + + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + case UCALL_SYNC: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +void test_vm_destroy(struct kvm_vm *vm) +{ + memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS); + counter_mask_available = 0; + kvm_vm_free(vm); +} + +static void test_vm_basic_test(void *guest_code) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + vm_init_vector_tables(vm); + /* Illegal instruction handler is required to verify read access without configuration */ + vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler); + + vcpu_init_vector_tables(vcpu); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu = NULL; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + /* PMU Snapshot requires single page only */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0); + /* PMU_SNAPSHOT_GPA_BASE is identity mapped */ + virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1); + + snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE); + snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gpa); +} + +static void test_vm_events_snapshot_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_overflow(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF), + "Sscofpmf is not available, skipping overflow test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + vm_init_vector_tables(vm); + vm_install_interrupt_handler(vm, guest_irq_handler); + + vcpu_init_vector_tables(vcpu); + /* Initialize guest timer frequency. */ + timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); + + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, timer_freq); + sync_global_to_guest(vm, vcpu_shared_irq_count); + sync_global_to_guest(vm, targs); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n", + name); + pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n", + SBI_PMU_OVERFLOW_IRQNUM_DEFAULT); + pr_info("\t-h: print this help screen\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT | + SBI_PMU_TEST_OVERFLOW; + int overflow_interrupts = 0; + + while ((opt = getopt(argc, argv, "ht:n:")) != -1) { + switch (opt) { + case 't': + if (!strncmp("basic", optarg, 5)) + temp_disabled_tests &= ~SBI_PMU_TEST_BASIC; + else if (!strncmp("events", optarg, 6)) + temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS; + else if (!strncmp("snapshot", optarg, 8)) + temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT; + else if (!strncmp("overflow", optarg, 8)) + temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW; + else + goto done; + targs.disabled_tests = temp_disabled_tests; + break; + case 'n': + overflow_interrupts = atoi_positive("Number of LCOFI", optarg); + break; + case 'h': + default: + goto done; + } + } + + if (overflow_interrupts > 0) { + if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) { + pr_info("-n option is only available for overflow test\n"); + goto done; + } else { + targs.overflow_irqnum = overflow_interrupts; + } + } + + return true; +done: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + targs.disabled_tests = 0; + targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT; + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) { + test_vm_basic_test(test_pmu_basic_sanity); + pr_info("SBI PMU basic test : PASS\n"); + } + + if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) { + test_vm_events_test(test_pmu_events); + pr_info("SBI PMU event verification test : PASS\n"); + } + + if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + test_vm_events_snapshot_test(test_pmu_events_snaphost); + pr_info("SBI PMU event verification with snapshot test : PASS\n"); + } + + if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + test_vm_events_overflow(test_pmu_events_overflow); + pr_info("SBI PMU event verification with overflow test : PASS\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index f74e76d03b7e..1375fca80bcd 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -1,5 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ + +/* + * Include rseq.c without _GNU_SOURCE defined, before including any headers, so + * that rseq.c is compiled with its configuration, not KVM selftests' config. + */ +#undef _GNU_SOURCE +#include "../rseq/rseq.c" +#define _GNU_SOURCE + #include <errno.h> #include <fcntl.h> #include <pthread.h> @@ -19,8 +27,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" - -#include "../rseq/rseq.c" +#include "ucall_common.h" /* * Any bug related to task migration is likely to be timing-dependent; perform @@ -186,13 +193,38 @@ static void calc_min_max_cpu(void) "Only one usable CPU, task migration not possible"); } +static void help(const char *name) +{ + puts(""); + printf("usage: %s [-h] [-u] [-l latency]\n", name); + printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); + puts(""); + exit(0); +} + int main(int argc, char *argv[]) { - int r, i, snapshot; + int r, i, snapshot, opt, fd = -1, latency = -1; + bool skip_sanity_check = false; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; + while ((opt = getopt(argc, argv, "hl:u")) != -1) { + switch (opt) { + case 'u': + skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); @@ -213,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -245,18 +291,29 @@ int main(int argc, char *argv[]) } while (snapshot != atomic_read(&seq_cnt)); TEST_ASSERT(rseq_cpu == cpu, - "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly * conservative ratio on x86-64, which can do _more_ KVM_RUNs than * migrations given the 1us+ delay in the migration task. + * + * Another reason why it may have small migration:KVM_RUN ratio is that, + * on systems with large low power mode wakeup latency, it may happen + * quite often that the scheduler is not able to wake up the target CPU + * before the vCPU thread is scheduled to another CPU. */ - TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?\n", i); + TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), + "Only performed %d KVM_RUNs, task stalled too much?\n\n" + " Try disabling deep sleep states to reduce CPU wakeup latency,\n" + " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index 1d73e78e8fa7..e39a724fe860 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -7,8 +7,6 @@ * Authors: * Nico Boehr <nrb@linux.ibm.com> */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -18,16 +16,18 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" #define MAIN_PAGE_COUNT 512 #define TEST_DATA_PAGE_COUNT 512 #define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN 4096 +#define TEST_DATA_START_GFN PAGE_SIZE #define TEST_DATA_TWO_PAGE_COUNT 256 #define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN 8192 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; @@ -67,7 +67,7 @@ static void guest_dirty_test_data(void) " lghi 5,%[page_count]\n" /* r5 += r1 */ "2: agfr 5,1\n" - /* r2 = r1 << 12 */ + /* r2 = r1 << PAGE_SHIFT */ "1: sllg 2,1,12(0)\n" /* essa(r4, r2, SET_STABLE) */ " .insn rrf,0xb9ab0000,4,2,1,0\n" @@ -94,11 +94,6 @@ static void guest_dirty_test_data(void) ); } -static struct kvm_vm *create_vm(void) -{ - return ____vm_create(VM_MODE_DEFAULT); -} - static void create_main_memslot(struct kvm_vm *vm) { int i; @@ -150,14 +145,14 @@ static void finish_vm_setup(struct kvm_vm *vm) slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, 0); } static struct kvm_vm *create_vm_two_memslots(void) { struct kvm_vm *vm; - vm = create_vm(); + vm = vm_create_barebones(); create_memslots(vm); @@ -237,8 +232,8 @@ static void test_get_cmma_basic(void) /* GET_CMMA_BITS without CMMA enabled should fail */ rc = vm_get_cmma_bits(vm, 0, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, ENXIO); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, ENXIO); enable_cmma(vm); vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); @@ -247,44 +242,44 @@ static void test_get_cmma_basic(void) /* GET_CMMA_BITS without migration mode and without peeking should fail */ rc = vm_get_cmma_bits(vm, 0, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); /* GET_CMMA_BITS without migration mode and with peeking should work */ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); - ASSERT_EQ(rc, 0); - ASSERT_EQ(errno_out, 0); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(errno_out, 0); enable_dirty_tracking(vm); enable_migration_mode(vm); /* GET_CMMA_BITS with invalid flags */ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); kvm_vm_free(vm); } static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) { - ASSERT_EQ(vcpu->run->exit_reason, 13); - ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); - ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); - ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); + TEST_ASSERT_EQ(vcpu->run->exit_reason, 13); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); } static void test_migration_mode(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); struct kvm_vcpu *vcpu; u64 orig_psw; int rc; /* enabling migration mode on a VM without memory should fail */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); errno = 0; @@ -304,8 +299,8 @@ static void test_migration_mode(void) /* migration mode when memslots have dirty tracking off should fail */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); errno = 0; @@ -314,7 +309,7 @@ static void test_migration_mode(void) /* enabling migration mode should work now */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(rc, 0); TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); errno = 0; @@ -350,7 +345,7 @@ static void test_migration_mode(void) */ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(rc, 0); TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); errno = 0; @@ -394,9 +389,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, MAIN_PAGE_COUNT); - ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); - ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT); + TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, 0); /* ...and then - after a hole - the TEST_DATA memslot should follow */ args = (struct kvm_s390_cmma_log){ @@ -407,9 +402,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); - ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); - ASSERT_EQ(args.remaining, 0); + TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); + TEST_ASSERT_EQ(args.remaining, 0); /* ...and nothing else should be there */ args = (struct kvm_s390_cmma_log){ @@ -420,9 +415,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, 0); - ASSERT_EQ(args.start_gfn, 0); - ASSERT_EQ(args.remaining, 0); + TEST_ASSERT_EQ(args.count, 0); + TEST_ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.remaining, 0); } /** @@ -449,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) ); } -static void test_get_inital_dirty(void) +static void test_get_initial_dirty(void) { struct kvm_vm *vm = create_vm_two_memslots(); struct kvm_vcpu *vcpu; @@ -498,11 +493,11 @@ static void assert_cmma_dirty(u64 first_dirty_gfn, u64 dirty_gfn_count, const struct kvm_s390_cmma_log *res) { - ASSERT_EQ(res->start_gfn, first_dirty_gfn); - ASSERT_EQ(res->count, dirty_gfn_count); + TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn); + TEST_ASSERT_EQ(res->count, dirty_gfn_count); for (size_t i = 0; i < dirty_gfn_count; i++) - ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ - ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ + TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ + TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ } static void test_get_skip_holes(void) @@ -656,7 +651,7 @@ struct testdef { } testlist[] = { { "migration mode and dirty tracking", test_migration_mode }, { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty }, { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, }; @@ -670,7 +665,7 @@ struct testdef { */ static int machine_has_cmma(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); int r; r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); diff --git a/tools/testing/selftests/kvm/s390/config b/tools/testing/selftests/kvm/s390/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..aded795d42be --- /dev/null +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not available\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c new file mode 100644 index 000000000000..ad8095968601 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/debug_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test KVM debugging features. */ +#include "kvm_util.h" +#include "test_util.h" +#include "sie.h" + +#include <linux/kvm.h> + +#define __LC_SVC_NEW_PSW 0x1c0 +#define __LC_PGM_NEW_PSW 0x1d0 +#define IPA0_DIAG 0x8300 +#define PGM_SPECIFICATION 0x06 + +/* Common code for testing single-stepping interruptions. */ +extern char int_handler[]; +asm("int_handler:\n" + "j .\n"); + +static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, + size_t new_psw_off, uint64_t *new_psw) +{ + struct kvm_guest_debug debug = {}; + struct kvm_regs regs; + struct kvm_vm *vm; + char *lowcore; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + lowcore = addr_gpa2hva(vm, 0); + new_psw[0] = (*vcpu)->run->psw_mask; + new_psw[1] = (uint64_t)int_handler; + memcpy(lowcore + new_psw_off, new_psw, 16); + vcpu_regs_get(*vcpu, ®s); + regs.gprs[2] = -1; + vcpu_regs_set(*vcpu, ®s); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + vcpu_guest_debug_set(*vcpu, &debug); + vcpu_run(*vcpu); + + return vm; +} + +static void test_step_int(void *guest_code, size_t new_psw_off) +{ + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* Test single-stepping "boring" program interruptions. */ +extern char test_step_pgm_guest_code[]; +asm("test_step_pgm_guest_code:\n" + ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n" + "j .\n"); + +static void test_step_pgm(void) +{ + test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by DIAG. + * Userspace emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_diag_guest_code[]; +asm("test_step_pgm_diag_guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test_step_pgm_diag(void) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm.code = PGM_SPECIFICATION, + }; + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, + __LC_PGM_NEW_PSW, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); + vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* + * Test single-stepping program interruptions caused by ISKE. + * CPUSTAT_KSS handling must not interfere with single-stepping. + */ +extern char test_step_pgm_iske_guest_code[]; +asm("test_step_pgm_iske_guest_code:\n" + "iske %r2,%r2\n" + "j .\n"); + +static void test_step_pgm_iske(void) +{ + test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by LCTL. + * KVM emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_lctl_guest_code[]; +asm("test_step_pgm_lctl_guest_code:\n" + "lctl %c0,%c0,1\n" + "j .\n"); + +static void test_step_pgm_lctl(void) +{ + test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW); +} + +/* Test single-stepping supervisor-call interruptions. */ +extern char test_step_svc_guest_code[]; +asm("test_step_svc_guest_code:\n" + "svc 0\n" + "j .\n"); + +static void test_step_svc(void) +{ + test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW); +} + +/* Run all tests above. */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "single-step pgm", test_step_pgm }, + { "single-step pgm caused by diag", test_step_pgm_diag }, + { "single-step pgm caused by iske", test_step_pgm_iske }, + { "single-step pgm caused by lctl", test_step_pgm_lctl }, + { "single-step svc", test_step_svc }, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 8e4b94d7b8dd..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -4,7 +4,6 @@ * * Copyright (C) 2019, Red Hat, Inc. */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -16,6 +15,8 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" enum mop_target { LOGICAL, @@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -279,10 +277,10 @@ enum stage { vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) { \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ + REPORT_GUEST_ASSERT(uc); \ } \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) \ static void prepare_mem12(void) @@ -376,6 +374,32 @@ static void test_copy(void) kvm_vm_free(t.kvm_vm); } +static void test_copy_access_register(void) +{ + struct test_default t = test_default_init(guest_copy); + + HOST_SYNC(t.vcpu, STAGE_INITED); + + prepare_mem12(); + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + + /* + * Primary address space gets used if an access register + * contains zero. The host makes use of AR[1] so is a good + * candidate to ensure the guest AR (of zero) is used. + */ + CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, + GADDR_V(mem1), AR(1)); + HOST_SYNC(t.vcpu, STAGE_COPIED); + + CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size, + GADDR_V(mem2), AR(1)); + ASSERT_MEM_EQ(mem1, mem2, t.size); + + kvm_vm_free(t.kvm_vm); +} + static void set_storage_key_range(void *addr, size_t len, uint8_t key) { uintptr_t _addr, abs, i; @@ -469,7 +493,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 16: return val; } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -490,6 +514,8 @@ static __uint128_t rotate(int size, __uint128_t val, int amount) amount = (amount + bits) % bits; val = cut_to_size(size, val); + if (!amount) + return val; return (val << (bits - amount)) | (val >> amount); } @@ -598,7 +624,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t return ret; } } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -808,7 +834,7 @@ static void test_termination(void) HOST_SYNC(t.vcpu, STAGE_IDLED); MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ - ASSERT_EQ(teid & teid_mask, 0); + TEST_ASSERT_EQ(teid & teid_mask, 0); kvm_vm_free(t.kvm_vm); } @@ -1103,6 +1129,11 @@ int main(int argc, char *argv[]) .requirements_met = extension_cap > 0, }, { + .name = "copy with access register mode", + .test = test_copy_access_register, + .requirements_met = true, + }, + { .name = "error checks with key", .test = test_errors_key, .requirements_met = extension_cap > 0, diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c index e41e2cb8ffa9..b58f75b381e5 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { uint64_t eval_reg; - vcpu_get_reg(vcpu, id, &eval_reg); + eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } @@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu) * (notably, the emergency call interrupt we have injected) should * be cleared by the resets, so this should be 0. */ - TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); TEST_ASSERT(!irqs, "IRQ pending"); } @@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu) irq->type = KVM_S390_INT_EMERGENCY; irq->u.emerg.code = vcpu->id; irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); - TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); } static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c new file mode 100644 index 000000000000..bba0d9a6dcc8 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test shared zeropage handling (with/without storage keys) + * + * Copyright (C) 2024, Red Hat, Inc. + */ +#include <sys/mman.h> + +#include <linux/fs.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +static void set_storage_key(void *addr, uint8_t skey) +{ + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); +} + +static void guest_code(void) +{ + /* Issue some storage key instruction. */ + set_storage_key((void *)0, 0x98); + GUEST_DONE(); +} + +/* + * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped. + * Returns < 0 on error or if nothing is mapped. + */ +static int maps_shared_zeropage(int pagemap_fd, void *addr) +{ + struct page_region region; + struct pm_scan_arg arg = { + .start = (uintptr_t)addr, + .end = (uintptr_t)addr + 4096, + .vec = (uintptr_t)®ion, + .vec_len = 1, + .size = sizeof(struct pm_scan_arg), + .category_mask = PAGE_IS_PFNZERO, + .category_anyof_mask = PAGE_IS_PRESENT, + .return_mask = PAGE_IS_PFNZERO, + }; + return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); +} + +int main(int argc, char *argv[]) +{ + char *mem, *page0, *page1, *page2, tmp; + const size_t pagesize = getpagesize(); + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int pagemap_fd; + + ksft_print_header(); + ksft_set_plan(3); + + /* + * We'll use memory that is not mapped into the VM for simplicity. + * Shared zeropages are enabled/disabled per-process. + */ + mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + + /* Disable THP. Ignore errors on older kernels. */ + madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE); + + page0 = mem; + page1 = page0 + pagesize; + page2 = page1 + pagesize; + + /* Can we even detect shared zeropages? */ + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_REQUIRE(pagemap_fd >= 0); + + tmp = *page0; + asm volatile("" : "+r" (tmp)); + TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Verify that we get the shared zeropage after VM creation. */ + tmp = *page1; + asm volatile("" : "+r" (tmp)); + ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1, + "Shared zeropages should be enabled\n"); + + /* + * Let our VM execute a storage key instruction that should + * unshare all shared zeropages. + */ + vcpu_run(vcpu); + get_ucall(vcpu, &uc); + TEST_ASSERT_EQ(uc.cmd, UCALL_DONE); + + /* Verify that we don't have a shared zeropage anymore. */ + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1), + "Shared zeropage should be gone\n"); + + /* Verify that we don't get any new shared zeropages. */ + tmp = *page2; + asm volatile("" : "+r" (tmp)); + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2), + "Shared zeropages should be disabled\n"); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c index 636a70ddac1e..53def355ccba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c @@ -10,8 +10,6 @@ * * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -39,13 +37,13 @@ static void guest_code(void) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) #define REG_COMPARE32(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%x, 0x%x\n", \ + " values did not match: 0x%x, 0x%x", \ left->reg, right->reg) @@ -82,14 +80,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; } @@ -103,14 +101,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; } @@ -125,12 +123,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && (run->s390_sieic.ipb >> 16) == 0x501, - "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -161,7 +159,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) } rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, "r11 sync regs value incorrect 0x%llx.", @@ -193,7 +191,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) run->s.regs.gprs[11] = 0xDEADBEEF; run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index a9a0b76e5fa4..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -4,14 +4,13 @@ * * Copyright IBM Corp. 2021 */ - #include <sys/mman.h> #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" +#include "processor.h" -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -151,12 +150,14 @@ static enum stage perform_next_stage(int *i, bool mapped_0) * instead. * In order to skip these tests we detect this inside the guest */ - skip = tests[*i].addr < (void *)4096 && + skip = tests[*i].addr < (void *)PAGE_SIZE && tests[*i].expected != TRANSL_UNAVAIL && !mapped_0; if (!skip) { result = test_protection(tests[*i].addr, tests[*i].key); - GUEST_ASSERT_2(result == tests[*i].expected, *i, result); + __GUEST_ASSERT(result == tests[*i].expected, + "Wanted %u, got %u, for i = %u", + tests[*i].expected, result, *i); } } return stage; @@ -190,9 +191,9 @@ static void guest_code(void) vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + REPORT_GUEST_ASSERT(uc); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) #define HOST_SYNC(vcpu, stage) \ diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c new file mode 100644 index 000000000000..50bc1c38225a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -0,0 +1,798 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ +#include "debug_print.h" +#include "kselftest_harness.h" +#include "kvm_util.h" +#include "processor.h" +#include "sie.h" + +#include <linux/capability.h> +#include <linux/sizes.h> + +#define PGM_SEGMENT_TRANSLATION 0x10 + +#define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + vm_paddr_t pgd; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (u64)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION", + strerror(errno), errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2", + strerror(errno), errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb22b); + + /* SSKE + ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + +static char uc_flic_b[PAGE_SIZE]; +static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 }; +static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 }; +static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 }; +static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 }; +static struct uc_flic_attr_test { + char *name; + struct kvm_device_attr a; + int hasrc; + int geterrno; + int seterrno; +} uc_flic_attr_tests[] = { + { + .name = "KVM_DEV_FLIC_GET_ALL_IRQS", + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_GET_ALL_IRQS, + .addr = (u64)&uc_flic_b, + .attr = PAGE_SIZE, + }, + }, + { + .name = "KVM_DEV_FLIC_ENQUEUE", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_ENQUEUE, }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IRQS", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_REGISTER", + .geterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_REGISTER, + .addr = (u64)&uc_flic_ioa, + }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_MODIFY", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_MODIFY, + .addr = (u64)&uc_flic_ioam, + .attr = sizeof(uc_flic_ioam), + }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_CLEAR_IO_IRQ, + .attr = 32, + }, + }, + { + .name = "KVM_DEV_FLIC_AISM", + .geterrno = EINVAL, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM, + .addr = (u64)&uc_flic_asim, + }, + }, + { + .name = "KVM_DEV_FLIC_AIRQ_INJECT", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, }, + }, + { + .name = "KVM_DEV_FLIC_AISM_ALL", + .geterrno = ENOTSUP, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM_ALL, + .addr = (u64)&uc_flic_asima, + .attr = sizeof(uc_flic_asima), + }, + }, + { + .name = "KVM_DEV_FLIC_APF_ENABLE", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_ENABLE, }, + }, + { + .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, }, + }, +}; + +TEST_F(uc_kvm, uc_flic_attrs) +{ + struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC }; + struct kvm_device_attr attr; + u64 value; + int rc, i; + + rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd); + ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) { + TH_LOG("test %s", uc_flic_attr_tests[i].name); + attr = (struct kvm_device_attr) { + .group = uc_flic_attr_tests[i].a.group, + .attr = uc_flic_attr_tests[i].a.attr, + .addr = uc_flic_attr_tests[i].a.addr, + }; + if (attr.addr == 0) + attr.addr = (u64)&value; + + rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc) + TH_LOG("expected dev attr missing %s", + uc_flic_attr_tests[i].name); + + rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc) + TH_LOG("get dev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].geterrno) + EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno) + TH_LOG("get dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + + rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc) + TH_LOG("set sev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].seterrno) + EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno) + TH_LOG("set dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + } + + close(cd.fd); +} + +TEST_F(uc_kvm, uc_set_gsi_routing) +{ + struct kvm_irq_routing *routing = kvm_gsi_routing_create(); + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + .u.adapter = (struct kvm_irq_routing_s390_adapter) { + .ind_addr = 0, + }, + }; + int rc; + + routing->entries[0] = ue; + routing->nr = 1; + rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing); + ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno); + ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c new file mode 100644 index 000000000000..714906c1d12a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/user_operexec.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test operation exception forwarding. + * + * Copyright IBM Corp. 2025 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include "kselftest.h" +#include "kvm_util.h" +#include "test_util.h" +#include "sie.h" + +#include <linux/kvm.h> + +static void guest_code_instr0(void) +{ + asm(".word 0x0000"); +} + +static void test_user_instr0(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +static void guest_code_user_operexec(void) +{ + asm(".word 0x0807"); +} + +static void test_user_operexec(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* + * Since user_operexec is the superset it can be used for the + * 0 instruction. + */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +/* combine user_instr0 and user_operexec */ +static void test_user_operexec_combined(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* Reverse enablement order */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); +} + +/* + * Run all tests above. + * + * Enablement after VCPU has been added is automatically tested since + * we enable the capability after VCPU creation. + */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "instr0", test_user_instr0 }, + { "operexec", test_user_operexec }, + { "operexec_combined", test_user_operexec_combined}, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0)); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a849ce23ca97..7fe427ff9b38 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <pthread.h> #include <sched.h> @@ -18,9 +17,9 @@ #include <processor.h> /* - * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a - * 2MB sized and aligned region so that the initial region corresponds to - * exactly one large page. + * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB + * sized and aligned region so that the initial region corresponds to exactly + * one large page. */ #define MEM_REGION_SIZE 0x200000 @@ -88,7 +87,7 @@ static void *vcpu_worker(void *data) } if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); return NULL; } @@ -98,11 +97,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); /* Wait for the vCPU thread to reenter the guest. */ usleep(100000); @@ -156,24 +155,27 @@ static void guest_code_move_memory_region(void) * window where the memslot is invalid is usually quite small. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the misaligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1 || val == 0, val); + __GUEST_ASSERT(val == 1 || val == 0, + "Expected '0' or '1' (no MMIO), got '%lx'", val); /* Spin until the memory region starts to get re-aligned. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1, val); + GUEST_ASSERT_EQ(val, 1); GUEST_DONE(); } -static void test_move_memory_region(void) +static void test_move_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -182,6 +184,9 @@ static void test_move_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); /* @@ -218,21 +223,33 @@ static void test_move_memory_region(void) static void guest_code_delete_memory_region(void) { + struct desc_ptr idt; uint64_t val; + /* + * Clobber the IDT so that a #PF due to the memory region being deleted + * escalates to triple-fault shutdown. Because the memory region is + * deleted, there will be no valid mappings. As a result, KVM will + * repeatedly intercepts the state-2 page fault that occurs when trying + * to vector the guest's #PF. I.e. trying to actually handle the #PF + * in the guest will never succeed, and so isn't an option. + */ + memset(&idt, 0, sizeof(idt)); + set_idt(&idt); + GUEST_SYNC(0); /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); /* Spin until the memory region is recreated. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 0, val); + GUEST_ASSERT_EQ(val, 0); /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); asm("1:\n\t" ".pushsection .rodata\n\t" @@ -249,10 +266,10 @@ static void guest_code_delete_memory_region(void) "final_rip_end: .quad 1b\n\t" ".popsection"); - GUEST_ASSERT_1(0, 0); + GUEST_ASSERT(0); } -static void test_delete_memory_region(void) +static void test_delete_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -262,6 +279,9 @@ static void test_delete_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); wait_for_vcpu(); @@ -299,7 +319,7 @@ static void test_delete_memory_region(void) if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) TEST_ASSERT(regs.rip >= final_rip_start && regs.rip < final_rip_end, - "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n", + "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx", final_rip_start, final_rip_end, regs.rip); kvm_vm_free(vm); @@ -323,6 +343,65 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ +static void test_invalid_memory_region_flags(void) +{ + uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES; + const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD; + struct kvm_vm *vm; + int r, i; + +#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__ + supported_flags |= KVM_MEM_READONLY; +#endif + +#ifdef __x86_64__ + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); + else +#endif + vm = vm_create_barebones(); + + if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE) + supported_flags |= KVM_MEM_GUEST_MEMFD; + + for (i = 0; i < 32; i++) { + if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i))) + continue; + + r = __vm_set_user_memory_region(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL); + + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i)); + + if (supported_flags & BIT(i)) + continue; + + r = __vm_set_user_memory_region2(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL, 0, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i)); + } + + if (supported_flags & KVM_MEM_GUEST_MEMFD) { + int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); + + r = __vm_set_user_memory_region2(vm, 0, + KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD, + 0, MEM_REGION_SIZE, NULL, guest_memfd, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported"); + + r = __vm_set_user_memory_region2(vm, 0, + KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD, + 0, MEM_REGION_SIZE, NULL, guest_memfd, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported"); + + close(guest_memfd); + } +} + /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -354,10 +433,10 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) @@ -367,9 +446,8 @@ static void test_add_max_memory_regions(void) mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, (uint64_t)max_mem_slots * MEM_REGION_SIZE, @@ -377,40 +455,200 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } + +#ifdef __x86_64__ +static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd, + size_t offset, const char *msg) +{ + int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, + 0, memfd, offset); + TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg); +} + +static void test_add_private_memory_region(void) +{ + struct kvm_vm *vm, *vm2; + int memfd, i; + + pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); + + test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); + test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); + + memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false); + test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); + close(memfd); + + vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); + memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); + test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); + + vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + kvm_vm_free(vm2); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); + for (i = 1; i < PAGE_SIZE; i++) + test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail"); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + + kvm_vm_free(vm); +} + +static void test_add_overlapping_private_memory_regions(void) +{ + struct kvm_vm *vm; + int memfd; + int r; + + pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2, + 0, memfd, MEM_REGION_SIZE * 2); + + /* + * Delete the first memslot, and then attempt to recreate it except + * with a "bad" offset that results in overlap in the guest_memfd(). + */ + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, 0, NULL, -1, 0); + + /* Overlap the front half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 - MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + /* And now the back half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 + MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + close(memfd); + kvm_vm_free(vm); +} + +static void guest_code_mmio_during_vectoring(void) +{ + const struct desc_ptr idt_desc = { + .address = MEM_REGION_GPA, + .size = 0xFFF, + }; + + set_idt(&idt_desc); + + /* Generate a #GP by dereferencing a non-canonical address */ + *((uint8_t *)NONCANONICAL) = 0x1; + + GUEST_ASSERT(0); +} + +/* + * This test points the IDT descriptor base to an MMIO address. It should cause + * a KVM internal error when an event occurs in the guest. + */ +static void test_mmio_during_vectoring(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + u64 expected_gpa; + + pr_info("Testing MMIO during vectoring error handling\n"); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring); + virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1); + + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV, + "Unexpected suberror = %d", vcpu->run->internal.suberror); + TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d", + run->internal.ndata); + + /* The reported GPA should be IDT base + offset of the GP vector */ + expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry); + + TEST_ASSERT(run->internal.data[3] == expected_gpa, + "Unexpected GPA = %llx (expected %lx)", + vcpu->run->internal.data[3], expected_gpa); + + kvm_vm_free(vm); +} +#endif + int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; -#endif + int j, disable_slot_zap_quirk = 0; -#ifdef __x86_64__ + if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL) + disable_slot_zap_quirk = 1; /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. */ test_zero_memory_regions(); + test_mmio_during_vectoring(); #endif + test_invalid_memory_region_flags(); + test_add_max_memory_regions(); #ifdef __x86_64__ + if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) && + (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) { + test_add_private_memory_region(); + test_add_overlapping_private_memory_regions(); + } else { + pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n"); + } + if (argc > 1) loops = atoi_positive("Number of iterations", argv[1]); else loops = 10; - pr_info("Testing MOVE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_move_memory_region(); + for (j = 0; j <= disable_slot_zap_quirk; j++) { + pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_move_memory_region(!!j); - pr_info("Testing DELETE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_delete_memory_region(); + pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_delete_memory_region(!!j); + } #endif return 0; diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index c87f38712073..8edc1fca345b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -4,18 +4,22 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <stdio.h> #include <time.h> #include <sched.h> #include <pthread.h> #include <linux/kernel.h> #include <asm/kvm.h> +#ifdef __riscv +#include "sbi.h" +#else #include <asm/kvm_para.h> +#endif #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define NR_VCPUS 4 #define ST_GPA_BASE (1 << 30) @@ -31,8 +35,8 @@ static uint64_t guest_stolen_time[NR_VCPUS]; static void check_status(struct kvm_steal_time *st) { GUEST_ASSERT(!(READ_ONCE(st->version) & 1)); - GUEST_ASSERT(READ_ONCE(st->flags) == 0); - GUEST_ASSERT(READ_ONCE(st->preempted) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0); } static void guest_code(int cpu) @@ -40,7 +44,7 @@ static void guest_code(int cpu) struct kvm_steal_time *st = st_gva[cpu]; uint32_t version; - GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); memset(st, 0, sizeof(*st)); GUEST_SYNC(0); @@ -81,20 +85,18 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - int i; - pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" steal: %lld\n", st->steal); - pr_info(" version: %d\n", st->version); - pr_info(" flags: %d\n", st->flags); - pr_info(" preempted: %d\n", st->preempted); - pr_info(" u8_pad: "); - for (i = 0; i < 3; ++i) - pr_info("%d", st->u8_pad[i]); - pr_info("\n pad: "); - for (i = 0; i < 11; ++i) - pr_info("%d", st->pad[i]); - pr_info("\n"); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" steal: %lld\n", st->steal); + ksft_print_msg(" version: %d\n", st->version); + ksft_print_msg(" flags: %d\n", st->flags); + ksft_print_msg(" preempted: %d\n", st->preempted); + ksft_print_msg(" u8_pad: %d %d %d\n", + st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]); + ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n", + st->pad[0], st->pad[1], st->pad[2], st->pad[3], + st->pad[4], st->pad[5], st->pad[6], st->pad[7], + st->pad[8], st->pad[9], st->pad[10]); } #elif defined(__aarch64__) @@ -116,14 +118,14 @@ static int64_t smccc(uint32_t func, uint64_t arg) { struct arm_smccc_res res; - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); return res.a0; } static void check_status(struct st_time *st) { - GUEST_ASSERT(READ_ONCE(st->rev) == 0); - GUEST_ASSERT(READ_ONCE(st->attr) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0); } static void guest_code(int cpu) @@ -132,15 +134,15 @@ static void guest_code(int cpu) int64_t status; status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_ST); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_ST, 0); - GUEST_ASSERT(status != -1); - GUEST_ASSERT(status == (ulong)st_gva[cpu]); + GUEST_ASSERT_NE(status, -1); + GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]); st = (struct st_time *)status; GUEST_SYNC(0); @@ -197,10 +199,106 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" rev: %d\n", st->rev); + ksft_print_msg(" attr: %d\n", st->attr); + ksft_print_msg(" st_time: %ld\n", st->st_time); +} + +#elif defined(__riscv) + +/* SBI STA shmem must have 64-byte alignment */ +#define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63) + +static vm_paddr_t st_gpa[NR_VCPUS]; + +struct sta_struct { + uint32_t sequence; + uint32_t flags; + uint64_t steal; + uint8_t preempted; + uint8_t pad[47]; +} __packed; + +static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags) +{ + unsigned long lo = (unsigned long)gpa; +#if __riscv_xlen == 32 + unsigned long hi = (unsigned long)(gpa >> 32); +#else + unsigned long hi = gpa == -1 ? -1 : 0; +#endif + struct sbiret ret = sbi_ecall(SBI_EXT_STA, 0, lo, hi, flags, 0, 0, 0); + + GUEST_ASSERT(ret.value == 0 && ret.error == 0); +} + +static void check_status(struct sta_struct *st) +{ + GUEST_ASSERT(!(READ_ONCE(st->sequence) & 1)); + GUEST_ASSERT(READ_ONCE(st->flags) == 0); + GUEST_ASSERT(READ_ONCE(st->preempted) == 0); +} + +static void guest_code(int cpu) +{ + struct sta_struct *st = st_gva[cpu]; + uint32_t sequence; + long out_val = 0; + bool probe; + + probe = guest_sbi_probe_extension(SBI_EXT_STA, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + sta_set_shmem(st_gpa[cpu], 0); + GUEST_SYNC(0); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + sequence = READ_ONCE(st->sequence); + check_status(st); + GUEST_SYNC(1); + + check_status(st); + GUEST_ASSERT(sequence < READ_ONCE(st->sequence)); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + check_status(st); + GUEST_DONE(); +} + +static bool is_steal_time_supported(struct kvm_vcpu *vcpu) +{ + uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); + unsigned long enabled = vcpu_get_reg(vcpu, id); + + TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); + + return enabled; +} + +static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +{ + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]); + sync_global_to_guest(vcpu->vm, st_gva[i]); + sync_global_to_guest(vcpu->vm, st_gpa[i]); +} + +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +{ + struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); + int i; + pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" rev: %d\n", st->rev); - pr_info(" attr: %d\n", st->attr); - pr_info(" st_time: %ld\n", st->st_time); + pr_info(" sequence: %d\n", st->sequence); + pr_info(" flags: %d\n", st->flags); + pr_info(" steal: %"PRIu64"\n", st->steal); + pr_info(" preempted: %d\n", st->preempted); + pr_info(" pad: "); + for (i = 0; i < 47; ++i) + pr_info("%d", st->pad[i]); + pr_info("\n"); } #endif @@ -267,7 +365,9 @@ int main(int ac, char **av) vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); + ksft_print_header(); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); + ksft_set_plan(NR_VCPUS); /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { @@ -308,14 +408,15 @@ int main(int ac, char **av) run_delay, stolen_time); if (verbose) { - pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i, - guest_stolen_time[i], stolen_time); - if (stolen_time == run_delay) - pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)"); - pr_info("\n"); + ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n", + i, guest_stolen_time[i], stolen_time, + stolen_time == run_delay ? + " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : ""); steal_time_dump(vm, i); } + ksft_test_result_pass("vcpu%d\n", i); } - return 0; + /* Print results and exit() accordingly */ + ksft_finished(); } diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 7f5b330b6a1b..513d421a9bff 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall %ld\n", + TEST_ASSERT(0, "unhandled ucall %ld", get_ucall(vcpu, &uc)); } } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index 11329e5ff945..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -6,8 +6,6 @@ * * Tests for amx #NM exception and save/restore. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -88,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -124,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); @@ -221,7 +204,7 @@ int main(int argc, char *argv[]) vm_vaddr_t amx_cfg, tiledata, xstate; struct ucall uc; u32 amx_offset; - int stage, ret; + int ret; /* * Note, all off-by-default features must be enabled before anything @@ -246,8 +229,6 @@ int main(int argc, char *argv[]) vcpu_regs_get(vcpu, ®s1); /* Register #NM handler */ - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); /* amx cfg for guest_code */ @@ -263,7 +244,7 @@ int main(int argc, char *argv[]) memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); - for (stage = 1; ; stage++) { + for (;;) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -296,7 +277,7 @@ int main(int argc, char *argv[]) void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); - TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); kvm_x86_state_cleanup(state); break; case 9: diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..8b15a13df939 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs. + */ + +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <asm/msr-index.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +{ + uint64_t data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + uint64_t msr_val; + uint8_t vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + uint64_t host_aperf_before, host_mperf_before; + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + uint64_t host_aperf_after, host_mperf_after; + uint64_t guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c new file mode 100644 index 000000000000..f8916bb34405 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Verify KVM correctly emulates the APIC bus frequency when the VMM configures + * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by + * programming TMICT (timer initial count) to the largest value possible (so + * that the timer will not expire during the test). Then, after an arbitrary + * amount of time has elapsed, verify TMCCT (timer current count) is within 1% + * of the expected value based on the time elapsed, the APIC bus frequency, and + * the programmed TDCR (timer divide configuration register). + */ + +#include "apic.h" +#include "test_util.h" + +/* + * Possible TDCR values with matching divide count. Used to modify APIC + * timer frequency. + */ +static const struct { + const uint32_t tdcr; + const uint32_t divide_count; +} tdcrs[] = { + {0x0, 2}, + {0x1, 4}, + {0x2, 8}, + {0x3, 16}, + {0x8, 32}, + {0x9, 64}, + {0xa, 128}, + {0xb, 1}, +}; + +static bool is_x2apic; + +static void apic_enable(void) +{ + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); +} + +static uint32_t apic_read_reg(unsigned int reg) +{ + return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); +} + +static void apic_write_reg(unsigned int reg, uint32_t val) +{ + if (is_x2apic) + x2apic_write_reg(reg, val); + else + xapic_write_reg(reg, val); +} + +static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + uint64_t tsc_hz = guest_tsc_khz * 1000; + const uint32_t tmict = ~0u; + uint64_t tsc0, tsc1, freq; + uint32_t tmcct; + int i; + + apic_enable(); + + /* + * Setup one-shot timer. The vector does not matter because the + * interrupt should not fire. + */ + apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); + + for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { + apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); + apic_write_reg(APIC_TMICT, tmict); + + tsc0 = rdtsc(); + udelay(delay_ms * 1000); + tmcct = apic_read_reg(APIC_TMCCT); + tsc1 = rdtsc(); + + /* + * Stop the timer _after_ reading the current, final count, as + * writing the initial counter also modifies the current count. + */ + apic_write_reg(APIC_TMICT, 0); + + freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); + /* Check if measured frequency is within 5% of configured frequency. */ + __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, + "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", + freq, apic_hz * 95 / 100, apic_hz * 105 / 100, + apic_hz, tdcrs[i].divide_count, tsc_hz); + } + + GUEST_DONE(); +} + +static void test_apic_bus_clock(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + } +} + +static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, + bool x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int ret; + + is_x2apic = x2apic; + + vm = vm_create(1); + + sync_global_to_guest(vm, is_x2apic); + + vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); + vcpu_args_set(vcpu, 2, apic_hz, delay_ms); + + ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + TEST_ASSERT(ret < 0 && errno == EINVAL, + "Setting of APIC bus frequency after vCPU is created should fail."); + + if (!is_x2apic) + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_apic_bus_clock(vcpu); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); + puts(""); + printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); + printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + /* + * Arbitrarilty default to 25MHz for the APIC bus frequency, which is + * different enough from the default 1GHz to be interesting. + */ + uint64_t apic_hz = 25 * 1000 * 1000; + uint64_t delay_ms = 100; + int opt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); + + while ((opt = getopt(argc, argv, "d:f:h")) != -1) { + switch (opt) { + case 'f': + apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; + break; + case 'd': + delay_ms = atoi_positive("Delay in milliseconds", optarg); + break; + case 'h': + default: + help(argv[0]); + exit(KSFT_SKIP); + } + } + + run_apic_bus_clock_test(apic_hz, delay_ms, false); + run_apic_bus_clock_test(apic_hz, delay_ms, true); +} diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index d3c3aa93f090..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -12,17 +12,16 @@ #include "kvm_util.h" #include "processor.h" -/* CPUIDs known to differ */ -struct { - u32 function; - u32 index; -} mangled_cpuids[] = { - /* - * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, - * which are not controlled for by this test. - */ - {.function = 0xd, .index = 0}, - {.function = 0xd, .index = 1}, +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) @@ -35,10 +34,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) guest_cpuid->entries[i].index, &eax, &ebx, &ecx, &edx); - GUEST_ASSERT(eax == guest_cpuid->entries[i].eax && - ebx == guest_cpuid->entries[i].ebx && - ecx == guest_cpuid->entries[i].ecx && - edx == guest_cpuid->entries[i].edx); + GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); + GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); + GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); + GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); } } @@ -51,22 +50,34 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); + GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001); GUEST_DONE(); } -static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) { - int i; - - for (i = 0; i < sizeof(mangled_cpuids); i++) { - if (mangled_cpuids[i].function == entrie->function && - mangled_cpuids[i].index == entrie->index) - return true; + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; } - - return false; + return mask; } static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, @@ -79,24 +90,30 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + e1 = &cpuid1->entries[i]; e2 = &cpuid2->entries[i]; TEST_ASSERT(e1->function == e2->function && e1->index == e2->index && e1->flags == e2->flags, - "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n", + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); - if (is_cpuid_mangled(e1)) - continue; + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); - TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && - e1->ecx == e2->ecx && e1->edx == e2->edx, + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", e1->function, e1->index, - e1->eax, e1->ebx, e1->ecx, e1->edx, - e2->eax, e2->ebx, e2->ecx, e2->edx); + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); } } @@ -116,7 +133,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) case UCALL_DONE: return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -170,7 +187,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, - "KVM didn't update nent on success, wanted %u, got %u\n", + "KVM didn't update nent on success, wanted %u, got %u", vcpu->cpuid->nent, cpuid->nent); for (i = 0; i < vcpu->cpuid->nent; i++) { diff --git a/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c new file mode 100644 index 000000000000..28cc66454601 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CR4 and CPUID sync test + * + * Copyright 2018, Red Hat, Inc. and/or its affiliates. + * + * Author: + * Wei Huang <wei@redhat.com> + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "processor.h" + +#define MAGIC_HYPERCALL_PORT 0x80 + +static void guest_code(void) +{ + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; + + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); + + /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_sregs sregs; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (1) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + continue; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index f6b295e0b2d2..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -47,15 +47,18 @@ static void guest_code(void) /* * Single step test, covers 2 basic instructions and 2 emulated * - * Enable interrupts during the single stepping to see that - * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. */ asm volatile("ss_start: " "sti\n\t" "xor %%eax,%%eax\n\t" "cpuid\n\t" - "movl $0x1a0,%%ecx\n\t" - "rdmsr\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" "cli\n\t" : : : "eax", "ebx", "ecx", "edx"); @@ -163,7 +166,7 @@ int main(void) /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { target_rip += ss_size[i]; memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index beb7e2c10211..b0d2b04a7ff2 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -17,6 +17,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #define VCPUS 2 #define SLOTS 2 @@ -40,9 +41,9 @@ struct kvm_page_stats { static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", @@ -72,7 +73,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); vcpu_last_completed_iteration[vcpu_idx] = current_iteration; @@ -92,7 +93,6 @@ static void run_test(enum vm_guest_mode mode, void *unused) uint64_t host_num_pages; uint64_t pages_per_slot; int i; - uint64_t total_4k_pages; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; struct kvm_page_stats stats_dirty_pass[ITERATIONS]; @@ -107,6 +107,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / SLOTS; + TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); + TEST_ASSERT(!(host_num_pages % 512), + "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); @@ -165,10 +168,8 @@ static void run_test(enum vm_guest_mode mode, void *unused) memstress_free_bitmaps(bitmaps, SLOTS); memstress_destroy_vm(vm); - /* Make assertions about the page counts. */ - total_4k_pages = stats_populated.pages_4k; - total_4k_pages += stats_populated.pages_2m * 512; - total_4k_pages += stats_populated.pages_1g * 512 * 512; + TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + + stats_populated.pages_1g * 512 * 512), host_num_pages); /* * Check that all huge pages were split. Since large pages can only @@ -179,22 +180,25 @@ static void run_test(enum vm_guest_mode mode, void *unused) * with that capability. */ if (dirty_log_manual_caps) { - ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); - ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); + TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); + TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_clear_pass[0].pages_4k); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); } else { - ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); + TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_dirty_logging_enabled.pages_4k); } /* * Once dirty logging is disabled and the vCPUs have touched all their - * memory again, the page counts should be the same as they were + * memory again, the hugepage counts should be the same as they were * right after initial population of memory. */ - ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); - ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); - ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); + TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); + TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c index e334844d6e1d..81055476d394 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c @@ -4,12 +4,9 @@ * * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" - #include "test_util.h" +#include "ucall_common.h" #define MMIO_GPA 0x700000000 #define MMIO_GVA MMIO_GPA @@ -35,7 +32,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); handle_flds_emulation_failure_exit(vcpu); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c new file mode 100644 index 000000000000..8926cfe0e209 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * Execute a fastop() instruction, with or without forced emulation. BT bit 0 + * to set RFLAGS.CF based on whether or not the input is even or odd, so that + * instructions like ADC and SBB are deterministic. + */ +#define fastop(__insn) \ + "bt $0, %[bt_val]\n\t" \ + __insn "\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" + +#define flags_constraint(flags_val) [flags]"=r"(flags_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val) + +#define guest_execute_fastop_1(FEP, insn, __val, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[val]") \ + : [val]"+r"(__val), flags_constraint(__flags) \ + : bt_constraint(__val) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_1(insn, type_t, __val) \ +({ \ + type_t val = __val, ex_val = __val, input = __val; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_1("", insn, ex_val, ex_flags); \ + guest_execute_fastop_1(KVM_FEP, insn, val, flags); \ + \ + __GUEST_ASSERT(val == ex_val, \ + "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \ + (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, flags); \ +}) + +#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : [input]"r"(__input), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_2(insn, type_t, __val1, __val2) \ +({ \ + type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \ + guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, (uint64_t)input, \ + (uint64_t)input2, (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \ +}) + +#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : "c"(__shift), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ +({ \ + type_t output = __val2, ex_output = __val2, input = __val2; \ + uint8_t shift = __val1; \ + uint64_t flags, ex_flags; \ + \ + guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ + guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + (uint64_t)ex_output, insn, shift, (uint64_t)input, \ + (uint64_t)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + ex_flags, insn, shift, (uint64_t)input, flags); \ +}) + +#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ + : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ + KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : [denom]"rm"(__rm), bt_constraint(__rm) \ + : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define guest_test_fastop_div(insn, type_t, __val1, __val2) \ +({ \ + type_t _a = __val1, _d = __val1, rm = __val2; \ + type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ + uint64_t flags, ex_flags; \ + uint8_t v, ex_v; \ + \ + ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ + v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ + \ + GUEST_ASSERT_EQ(v, ex_v); \ + __GUEST_ASSERT(v == ex_v, \ + "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ + ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \ + __GUEST_ASSERT(a == ex_a && d == ex_d, \ + "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ + (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \ + (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \ + __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ + "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\ +}) + +static const uint64_t vals[] = { + 0, + 1, + 2, + 4, + 7, + 0x5555555555555555, + 0xaaaaaaaaaaaaaaaa, + 0xfefefefefefefefe, + 0xffffffffffffffff, +}; + +#define guest_test_fastops(type_t, suffix) \ +do { \ + int i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(vals); i++) { \ + guest_test_fastop_1("dec" suffix, type_t, vals[i]); \ + guest_test_fastop_1("inc" suffix, type_t, vals[i]); \ + guest_test_fastop_1("neg" suffix, type_t, vals[i]); \ + guest_test_fastop_1("not" suffix, type_t, vals[i]); \ + \ + for (j = 0; j < ARRAY_SIZE(vals); j++) { \ + guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ +if (sizeof(type_t) != 1) { \ + guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ +} \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \ + } \ + } \ +} while (0) + +static void guest_code(void) +{ + guest_test_fastops(uint8_t, "b"); + guest_test_fastops(uint16_t, "w"); + guest_test_fastops(uint32_t, "l"); + guest_test_fastops(uint64_t, "q"); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 0f728f05ea82..762628f7d4ba 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c @@ -9,6 +9,7 @@ #include <linux/stringify.h> #include <stdint.h> +#include "kvm_test_harness.h" #include "apic.h" #include "test_util.h" #include "kvm_util.h" @@ -83,6 +84,8 @@ static void guest_main(void) GUEST_DONE(); } +KVM_ONE_VCPU_TEST_SUITE(fix_hypercall); + static void enter_guest(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -103,15 +106,10 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } -static void test_fix_hypercall(bool disable_quirk) +static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(&vcpu, guest_main); + struct kvm_vm *vm = vcpu->vm; - vm_init_descriptor_tables(vcpu->vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); if (disable_quirk) @@ -126,10 +124,19 @@ static void test_fix_hypercall(bool disable_quirk) enter_guest(vcpu); } -int main(void) +KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main) +{ + test_fix_hypercall(vcpu, false); +} + +KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main) +{ + test_fix_hypercall(vcpu, true); +} + +int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); - test_fix_hypercall(false); - test_fix_hypercall(true); + return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 0a1573d52882..37b1a9f52864 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h @@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) insn_bytes = run->emulation_failure.insn_bytes; TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", insn_bytes[0], insn_bytes[1]); vcpu_regs_get(vcpu, ®s); diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c new file mode 100644 index 000000000000..10b1b0ba374e --- /dev/null +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023, Google LLC. + */ +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) +{ + const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); + const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); + const uint64_t legal = ignored | valid; + uint64_t val = BIT_ULL(bit); + uint64_t actual; + int r; + + r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); + TEST_ASSERT(val & ~legal ? !r : r == 1, + "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s", + val, val & ~legal ? "fail" : "succeed"); + + actual = vcpu_get_msr(vcpu, MSR_K7_HWCR); + TEST_ASSERT(actual == (val & valid), + "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx", + bit, actual, (val & valid)); + + vcpu_set_msr(vcpu, MSR_K7_HWCR, 0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + unsigned int bit; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + for (bit = 0; bit < BITS_PER_LONG; bit++) + test_hwcr_bit(vcpu, bit); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index f25749eaa6a8..e058bc676cd6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -211,6 +211,9 @@ int main(void) vm_vaddr_t tsc_page_gva; int stage; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); vcpu_set_hv_cpuid(vcpu); @@ -218,7 +221,7 @@ int main(void) tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, - "TSC page has to be page aligned\n"); + "TSC page has to be page aligned"); vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); host_check_tsc_msr_rdtsc(vcpu); @@ -235,7 +238,7 @@ int main(void) break; case UCALL_DONE: /* Keep in sync with guest_main() */ - TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", stage); goto out; default: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 5c27efbf405e..3c21af811d8f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -7,8 +7,6 @@ * This work is licensed under the terms of the GNU GPL, version 2. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -24,32 +22,19 @@ static void guest_code(void) { } -static bool smt_possible(void) -{ - char buf[16]; - FILE *f; - bool res = true; - - f = fopen("/sys/devices/system/cpu/smt/control", "r"); - if (f) { - if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { - if (!strncmp(buf, "forceoff", 8) || - !strncmp(buf, "notsupported", 12)) - res = false; - } - fclose(f); - } - - return res; -} - -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) +static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; + const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; u32 test_val; + if (vcpu) + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + else + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " (returned %d)", @@ -60,7 +45,7 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), - "function %x is our of supported range", + "function %x is out of supported range", entry->function); TEST_ASSERT(entry->index == 0, @@ -82,12 +67,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, entry->eax, evmcs_expected ); break; + case 0x40000003: + TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)), + "\"Direct\" Synthetic Timers should require in-kernel APIC"); + break; case 0x40000004: test_val = entry->eax & (1UL << 18); - TEST_ASSERT(!!test_val == !smt_possible(), + TEST_ASSERT(!!test_val == !is_smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); + + TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)), + "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC"); break; case 0x4000000A: TEST_ASSERT(entry->eax & (1UL << 19), @@ -111,9 +103,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } + + /* + * Note, the CPUID array returned by the system-scoped helper is a one- + * time allocation, i.e. must not be freed. + */ + if (vcpu) + free((void *)hv_cpuid_entries); } -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; @@ -131,19 +130,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + /* Test the vCPU ioctl without an in-kernel local APIC. */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_hv_cpuid(vcpu, false); + kvm_vm_free(vm); /* Test vCPU ioctl version */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, false); if (!kvm_cpu_has(X86_FEATURE_VMX) || !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { @@ -151,9 +151,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, true); do_sys: /* Test system ioctl version */ @@ -163,9 +161,7 @@ do_sys: } test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 7bde0c4dfdbd..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -4,7 +4,6 @@ * * Tests for Enlightened VMCS, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -240,11 +239,12 @@ int main(int argc, char *argv[]) struct ucall uc; int stage; - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); hcall_page = vm_vaddr_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); @@ -257,8 +257,6 @@ int main(int argc, char *argv[]) vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 73af44d2167f..949e08e98f31 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -8,7 +8,6 @@ * Copyright 2022 Google LLC * Author: Vipin Sharma <vipinsh@google.com> */ - #include "kvm_util.h" #include "processor.h" #include "hyperv.h" @@ -44,6 +43,8 @@ int main(void) uint64_t *outval; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); + /* Verify if extended hypercalls are supported */ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(), HV_ENABLE_EXTENDED_HYPERCALLS)) { @@ -84,7 +85,7 @@ int main(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 78606de9385d..130b9ce7e5dd 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr) vector = rdmsr_safe(msr->idx, &msr_val); if (msr->fault_expected) - GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); else - GUEST_ASSERT_3(!vector, msr->idx, vector, 0); + __GUEST_ASSERT(!vector, + "Expected success on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); if (vector || is_write_only_msr(msr->idx)) goto done; if (msr->write) - GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx, - msr_val, msr->write_val); + __GUEST_ASSERT(!vector, + "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'", + msr->idx, msr->write_val, msr_val); /* Invariant TSC bit appears when TSC invariant control MSR is written to */ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { @@ -82,24 +87,28 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) u64 res, input, output; uint8_t vector; - GUEST_ASSERT(hcall->control); + GUEST_ASSERT_NE(hcall->control, 0); wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { input = pgs_gpa; - output = pgs_gpa + 4096; + output = pgs_gpa + PAGE_SIZE; } else { input = output = 0; } vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { - GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); + __GUEST_ASSERT(vector == UD_VECTOR, + "Expected #UD for control '%lu', got %s", + hcall->control, ex_str(vector)); } else { - GUEST_ASSERT_2(!vector, hcall->control, vector); - GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res); + __GUEST_ASSERT(!vector, + "Expected no exception for control '%lu', got %s", + hcall->control, ex_str(vector)); + GUEST_ASSERT_EQ(res, hcall->expect); } GUEST_DONE(); @@ -147,9 +156,6 @@ static void guest_test_msrs_access(void) vcpu_init_cpuid(vcpu, prev_cpuid); } - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* TODO: Make this entire test easier to maintain. */ if (stage >= 21) vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); @@ -445,7 +451,7 @@ static void guest_test_msrs_access(void) case 44: /* MSR is not available when CPUID feature bit is unset */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; msr->fault_expected = true; @@ -453,7 +459,7 @@ static void guest_test_msrs_access(void) case 45: /* MSR is vailable when CPUID feature bit is set */ if (!has_invtsc) - continue; + goto next_stage; vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; @@ -462,7 +468,7 @@ static void guest_test_msrs_access(void) case 46: /* Writing bits other than 0 is forbidden */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 0xdeadbeef; @@ -471,7 +477,7 @@ static void guest_test_msrs_access(void) case 47: /* Setting bit 0 enables the feature */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 1; @@ -495,7 +501,7 @@ static void guest_test_msrs_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; @@ -504,6 +510,7 @@ static void guest_test_msrs_access(void) return; } +next_stage: stage++; kvm_vm_free(vm); } @@ -522,9 +529,6 @@ static void guest_test_hcalls_access(void) while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); @@ -665,7 +669,7 @@ static void guest_test_hcalls_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; @@ -681,6 +685,8 @@ static void guest_test_hcalls_access(void) int main(void) { + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID)); + pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 6feb5ddb031d..ca61836c4e32 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> @@ -65,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* Signal sender vCPU we're ready */ ipis_rcvd[vcpu_id] = (u64)-1; - for (;;) - asm volatile("sti; hlt; cli"); + for (;;) { + safe_halt(); + cli(); + } } static void guest_ipi_handler(struct ex_regs *regs) @@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ ipi->vector = IPI_VECTOR; ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 0; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -248,22 +248,21 @@ int main(int argc, char *argv[]) int stage = 1, r; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - vm_init_descriptor_tables(vm); vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); - vcpu_init_descriptor_tables(vcpu[1]); vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); vcpu_set_hv_cpuid(vcpu[1]); vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); - vcpu_init_descriptor_tables(vcpu[2]); vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); vcpu_set_hv_cpuid(vcpu[2]); @@ -287,7 +286,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index e446d76d1c0c..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -1,12 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -160,6 +157,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 4758b6ef5618..a3b7ce155981 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <asm/barrier.h> #include <pthread.h> #include <inttypes.h> @@ -590,6 +588,8 @@ int main(int argc, char *argv[]) struct ucall uc; int stage = 1, r, i; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Test data page */ @@ -621,7 +621,7 @@ int main(int argc, char *argv[]) for (i = 0; i < NTEST_PAGES; i++) { pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); } @@ -656,7 +656,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_ABORT: diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c new file mode 100644 index 000000000000..d88500c118eb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ +#include <linux/atomic.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" +#include "test_util.h" + +#define NR_BUS_LOCKS_PER_LEVEL 100 +#define CACHE_LINE_SIZE 64 + +/* + * To generate a bus lock, carve out a buffer that precisely occupies two cache + * lines and perform an atomic access that splits the two lines. + */ +static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE); +static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)]; + +static void guest_generate_buslocks(void) +{ + for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++) + atomic_inc(val); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_generate_buslocks(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *test_data) +{ + guest_generate_buslocks(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + int i, bus_locks = 0; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT)); + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + run = vcpu->run; + + for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) { + struct ucall uc; + + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto done; + case UCALL_SYNC: + continue; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK); + + /* + * Verify the counter is actually getting incremented, e.g. that + * KVM isn't skipping the instruction. On Intel, the exit is + * trap-like, i.e. the counter should already have been + * incremented. On AMD, it's fault-like, i.e. the counter will + * be incremented when the guest re-executes the instruction. + */ + sync_global_from_guest(vm, *val); + TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel); + + bus_locks++; + } + TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks); +done: + TEST_ASSERT_EQ(i, bus_locks); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 1778704360a6..5bc12222d87a 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) break; } while (errno == EINTR); - TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r); + TEST_ASSERT(!r, "clock_gettime() failed: %d", r); data.realtime = ts.tv_sec * NSEC_PER_SEC; data.realtime += ts.tv_nsec; @@ -127,47 +127,11 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd); + TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); } } } -#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" - -static void check_clocksource(void) -{ - char *clk_name; - struct stat st; - FILE *fp; - - fp = fopen(CLOCKSOURCE_PATH, "r"); - if (!fp) { - pr_info("failed to open clocksource file: %d; assuming TSC.\n", - errno); - return; - } - - if (fstat(fileno(fp), &st)) { - pr_info("failed to stat clocksource file: %d; assuming TSC.\n", - errno); - goto out; - } - - clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n"); - - if (!fgets(clk_name, st.st_size, fp)) { - pr_info("failed to read clocksource file: %d; assuming TSC.\n", - ferror(fp)); - goto out; - } - - TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size), - "clocksource not supported: %s", clk_name); -out: - fclose(fp); -} - int main(void) { struct kvm_vcpu *vcpu; @@ -179,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - check_clocksource(); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index f774a9e62858..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr) PR_MSR(msr); vector = rdmsr_safe(msr->idx, &ignored); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); vector = wrmsr_safe(msr->idx, 0); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); } struct hcall_data { @@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc) PR_HCALL(hc); r = kvm_hypercall(hc->nr, 0, 0, 0, 0); - GUEST_ASSERT(r == -KVM_ENOSYS); + GUEST_ASSERT_EQ(r, -KVM_ENOSYS); } static void guest_main(void) @@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) pr_hcall(&uc); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "vector = %lu"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: return; @@ -133,6 +133,71 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } +static void test_pv_unhalt(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_cpuid_entry2 *ent; + u32 kvm_sig_old; + int r; + + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; + + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + + /* KVM_PV_UNHALT test */ + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); + + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + kvm_sig_old = ent->ebx; + ent->ebx = 0xdeadbeef; + vcpu_set_cpuid(vcpu); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); + ent->ebx = kvm_sig_old; + vcpu_set_cpuid(vcpu); + + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ + + kvm_vm_free(vm); +} + int main(void) { struct kvm_vcpu *vcpu; @@ -146,9 +211,8 @@ int main(void) vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - enter_guest(vcpu); kvm_vm_free(vm); + + test_pv_unhalt(); } diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c index 3cc4b86832fe..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c @@ -26,19 +26,37 @@ int main(int argc, char *argv[]) TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ + if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); + + /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); + } + /* Set KVM_CAP_MAX_VCPU_ID */ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); - /* Try to set KVM_CAP_MAX_VCPU_ID again */ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); - /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); + TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); + + /* Create vCPU with id within bounds */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); + TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); + + close(ret); kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c new file mode 100644 index 000000000000..e45c028d2a7e --- /dev/null +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "kvm_util.h" +#include "processor.h" +#include "kselftest.h" + +#define CPUID_MWAIT (1u << 3) + +enum monitor_mwait_testcases { + MWAIT_QUIRK_DISABLED = BIT(0), + MISC_ENABLES_QUIRK_DISABLED = BIT(1), + MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, +}; + +/* + * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all + * other scenarios KVM should emulate them as nops. + */ +#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \ +do { \ + bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \ + ((testcase) & MWAIT_DISABLED); \ + \ + if (fault_wanted) \ + __GUEST_ASSERT((vector) == UD_VECTOR, \ + "Expected #UD on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ + else \ + __GUEST_ASSERT(!(vector), \ + "Expected success on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ +} while (0) + +static void guest_monitor_wait(void *arg) +{ + int testcase = (int) (long) arg; + u8 vector; + + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); + + /* + * Arbitrarily MONITOR this function, SVM performs fault checks before + * intercept checks, so the inputs for MONITOR and MWAIT must be valid. + */ + vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); + GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector); + + vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); + GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + uint64_t disabled_quirks; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int testcase; + char test[80]; + + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); + + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + /* Detected in vcpu_run */ + break; + case UCALL_DONE: + ksft_test_result_pass("%s\n", test); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + kvm_vm_free(vm); + } + ksft_finished(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c new file mode 100644 index 000000000000..40d918aedce6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/msrs_test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <asm/msr-index.h> + +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" + +/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */ +#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR + +struct kvm_msr { + const struct kvm_x86_cpu_feature feature; + const struct kvm_x86_cpu_feature feature2; + const char *name; + const u64 reset_val; + const u64 write_val; + const u64 rsvd_val; + const u32 index; + const bool is_kvm_defined; +}; + +#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \ +{ \ + .index = msr, \ + .name = str, \ + .write_val = val, \ + .rsvd_val = rsvd, \ + .reset_val = reset, \ + .feature = X86_FEATURE_ ##feat, \ + .feature2 = X86_FEATURE_ ##f2, \ + .is_kvm_defined = is_kvm, \ +} + +#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \ + ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false) + +#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, reset, feat) + +#define MSR_TEST(msr, val, rsvd, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, 0, feat) + +#define MSR_TEST2(msr, val, rsvd, feat, f2) \ + ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false) + +/* + * Note, use a page aligned value for the canonical value so that the value + * is compatible with MSRs that use bits 11:0 for things other than addresses. + */ +static const u64 canonical_val = 0x123456789000ull; + +/* + * Arbitrary value with bits set in every byte, but not all bits set. This is + * also a non-canonical value, but that's coincidental (any 64-bit value with + * an alternating 0s/1s pattern will be non-canonical). + */ +static const u64 u64_val = 0xaaaa5555aaaa5555ull; + +#define MSR_TEST_CANONICAL(msr, feat) \ + __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat) + +#define MSR_TEST_KVM(msr, val, rsvd, feat) \ + ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true) + +/* + * The main struct must be scoped to a function due to the use of structures to + * define features. For the global structure, allocate enough space for the + * foreseeable future without getting too ridiculous, to minimize maintenance + * costs (bumping the array size every time an MSR is added is really annoying). + */ +static struct kvm_msr msrs[128]; +static int idx; + +static bool ignore_unsupported_msrs; + +static u64 fixup_rdmsr_val(u32 msr, u64 want) +{ + /* + * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM + * is supposed to emulate that behavior based on guest vendor model + * (which is the same as the host vendor model for this test). + */ + if (!host_cpu_is_amd) + return want; + + switch (msr) { + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + case MSR_TSC_AUX: + return want & GENMASK_ULL(31, 0); + default: + return want; + } +} + +static void __rdmsr(u32 msr, u64 want) +{ + u64 val; + u8 vec; + + vec = rdmsr_safe(msr, &val); + __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr); + + __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx", + want, msr, val); +} + +static void __wrmsr(u32 msr, u64 val) +{ + u8 vec; + + vec = wrmsr_safe(msr, val); + __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)", + ex_str(vec), msr, val); + __rdmsr(msr, fixup_rdmsr_val(msr, val)); +} + +static void guest_test_supported_msr(const struct kvm_msr *msr) +{ + __rdmsr(msr->index, msr->reset_val); + __wrmsr(msr->index, msr->write_val); + GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val)); + + __rdmsr(msr->index, msr->reset_val); +} + +static void guest_test_unsupported_msr(const struct kvm_msr *msr) +{ + u64 val; + u8 vec; + + /* + * KVM's ABI with respect to ignore_msrs is a mess and largely beyond + * repair, just skip the unsupported MSR tests. + */ + if (ignore_unsupported_msrs) + goto skip_wrmsr_gp; + + /* + * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are + * writable only if their associated feature is supported. Skip the + * RDMSR #GP test if the secondary feature is supported, but perform + * the WRMSR #GP test as the to-be-written value is tied to the primary + * feature. For all other MSRs, simply do nothing. + */ + if (this_cpu_has(msr->feature2)) { + if (msr->index != MSR_IA32_U_CET && + msr->index != MSR_IA32_S_CET) + goto skip_wrmsr_gp; + + goto skip_rdmsr_gp; + } + + vec = rdmsr_safe(msr->index, &val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s", + msr->index, ex_str(vec)); + +skip_rdmsr_gp: + vec = wrmsr_safe(msr->index, msr->write_val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->write_val, ex_str(vec)); + +skip_wrmsr_gp: + GUEST_SYNC(0); +} + +void guest_test_reserved_val(const struct kvm_msr *msr) +{ + /* Skip reserved value checks as well, ignore_msrs is trully a mess. */ + if (ignore_unsupported_msrs) + return; + + /* + * If the CPU will truncate the written value (e.g. SYSENTER on AMD), + * expect success and a truncated value, not #GP. + */ + if (!this_cpu_has(msr->feature) || + msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { + u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); + + __GUEST_ASSERT(vec == GP_VECTOR, + "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->rsvd_val, ex_str(vec)); + } else { + __wrmsr(msr->index, msr->rsvd_val); + __wrmsr(msr->index, msr->reset_val); + } +} + +static void guest_main(void) +{ + for (;;) { + const struct kvm_msr *msr = &msrs[READ_ONCE(idx)]; + + if (this_cpu_has(msr->feature)) + guest_test_supported_msr(msr); + else + guest_test_unsupported_msr(msr); + + if (msr->rsvd_val) + guest_test_reserved_val(msr); + + GUEST_SYNC(msr->reset_val); + } +} + +static bool has_one_reg; +static bool use_one_reg; + +#define KVM_X86_MAX_NR_REGS 1 + +static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg) +{ + struct { + struct kvm_reg_list list; + u64 regs[KVM_X86_MAX_NR_REGS]; + } regs = {}; + int r, i; + + /* + * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported + * regs, then the vCPU obviously doesn't support the reg. + */ + r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + if (!r) + return false; + + TEST_ASSERT_EQ(errno, E2BIG); + + /* + * KVM x86 is expected to support enumerating a relative small number + * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG + * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For + * simplicity, hardcode the maximum number of regs and manually update + * the test as necessary. + */ + TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS, + "KVM reports %llu regs, test expects at most %u regs, stale test?", + regs.list.n, KVM_X86_MAX_NR_REGS); + + vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + for (i = 0; i < regs.list.n; i++) { + if (regs.regs[i] == reg) + return true; + } + + return false; +} + +static void host_test_kvm_reg(struct kvm_vcpu *vcpu) +{ + bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature); + u64 reset_val = msrs[idx].reset_val; + u64 write_val = msrs[idx].write_val; + u64 rsvd_val = msrs[idx].rsvd_val; + u32 reg = msrs[idx].index; + u64 val; + int r; + + if (!use_one_reg) + return; + + TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg); + + if (!has_reg) { + r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val); + TEST_ASSERT(r && errno == EINVAL, + "Expected failure on get_reg(0x%x)", reg); + rsvd_val = 0; + goto out; + } + + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, reg, val); + + vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val); + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + write_val, reg, val); + +out: + r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val); + TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val); +} + +static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val) +{ + u64 reset_val = msrs[idx].reset_val; + u32 msr = msrs[idx].index; + u64 val; + + if (!kvm_cpu_has(msrs[idx].feature)) + return; + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + guest_val, msr, val); + + if (use_one_reg) + vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val); + else + vcpu_set_msr(vcpu, msr, reset_val); + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + reset_val, msr, val); + + if (!has_one_reg) + return; + + val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, msr, val); +} + +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + for (;;) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + host_test_msr(vcpu, uc.args[1]); + return; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_DONE: + TEST_FAIL("Unexpected UCALL_DONE"); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS) +{ + int i; + + for (i = 0; i < NR_VCPUS; i++) + do_vcpu_run(vcpus[i]); +} + +#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL) + +static void test_msrs(void) +{ + const struct kvm_msr __msrs[] = { + MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE, + MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING, + MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE), + MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE), + + /* + * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add + * entries for each features so that TSC_AUX doesn't exists for + * the "unsupported" vCPU, and obviously to test both cases. + */ + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID), + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP), + + MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE), + /* + * SYSENTER_{ESP,EIP} are technically non-canonical on Intel, + * but KVM doesn't emulate that behavior on emulated writes, + * i.e. this test will observe different behavior if the MSR + * writes are handed by hardware vs. KVM. KVM's behavior is + * intended (though far from ideal), so don't bother testing + * non-canonical values. + */ + MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE), + MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE), + + MSR_TEST_CANONICAL(MSR_FS_BASE, LM), + MSR_TEST_CANONICAL(MSR_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_LSTAR, LM), + MSR_TEST_CANONICAL(MSR_CSTAR, LM), + MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM), + + MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK), + + MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK), + }; + + const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE; + const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM; + + /* + * Create three vCPUs, but run them on the same task, to validate KVM's + * context switching of MSR state. Don't pin the task to a pCPU to + * also validate KVM's handling of cross-pCPU migration. Use the full + * set of features for the first two vCPUs, but clear all features in + * third vCPU in order to test both positive and negative paths. + */ + const int NR_VCPUS = 3; + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct kvm_vm *vm; + int i; + + kvm_static_assert(sizeof(__msrs) <= sizeof(msrs)); + kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs)); + memcpy(msrs, __msrs, sizeof(__msrs)); + + ignore_unsupported_msrs = kvm_is_ignore_msrs(); + + vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus); + + sync_global_to_guest(vm, msrs); + sync_global_to_guest(vm, ignore_unsupported_msrs); + + /* + * Clear features in the "unsupported features" vCPU. This needs to be + * done before the first vCPU run as KVM's ABI is that guest CPUID is + * immutable once the vCPU has been run. + */ + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + /* + * Don't clear LM; selftests are 64-bit only, and KVM doesn't + * honor LM=0 for MSRs that are supposed to exist if and only + * if the vCPU is a 64-bit model. Ditto for NONE; clearing a + * fake feature flag will result in false failures. + */ + if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) && + memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none))) + vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature); + } + + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + struct kvm_msr *msr = &msrs[idx]; + + if (msr->is_kvm_defined) { + for (i = 0; i < NR_VCPUS; i++) + host_test_kvm_reg(vcpus[i]); + continue; + } + + /* + * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST + * are consistent with respect to MSRs whose existence is + * enumerated via CPUID. Skip the check for FS/GS.base MSRs, + * as they aren't reported in the save/restore list since their + * state is managed via SREGS. + */ + TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE || + kvm_msr_is_in_save_restore_list(msr->index) == + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)), + "%s %s in save/restore list, but %s according to CPUID", msr->name, + kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't", + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ? + "supported" : "unsupported"); + + sync_global_to_guest(vm, idx); + + vcpus_run(vcpus, NR_VCPUS); + vcpus_run(vcpus, NR_VCPUS); + } + + kvm_vm_free(vm); +} + +int main(void) +{ + has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG); + + test_msrs(); + + if (has_one_reg) { + use_one_reg = true; + test_msrs(); + } +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c index dad988351493..f001cb836bfa 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_close_while_nested - * * Copyright (C) 2019, Red Hat, Inc. * * Verify that nothing bad happens if a KVM user exits with open @@ -12,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -22,6 +21,8 @@ enum { PORT_L0_EXIT = 0x2000, }; +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(void) { /* Exit to L0 */ @@ -29,9 +30,8 @@ static void l2_guest_code(void) : : [port] "d" (PORT_L0_EXIT) : "rax"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_vmx_code(struct vmx_pages *vmx_pages) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); @@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(0); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Prepare the VMCB for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(0); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); for (;;) { volatile struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c new file mode 100644 index 000000000000..abc824dba04f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +enum { + SVM_F, + VMX_F, + NR_VIRTUALIZATION_FLAVORS, +}; + +struct emulated_instruction { + const char name[32]; + uint8_t opcode[15]; + uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS]; +}; + +static struct emulated_instruction instructions[] = { + { + .name = "pause", + .opcode = { 0xf3, 0x90 }, + .exit_reason = { SVM_EXIT_PAUSE, + EXIT_REASON_PAUSE_INSTRUCTION, } + }, + { + .name = "hlt", + .opcode = { 0xf4 }, + .exit_reason = { SVM_EXIT_HLT, + EXIT_REASON_HLT, } + }, +}; + +static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; +static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; + +static uint32_t get_instruction_length(struct emulated_instruction *insn) +{ + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) + ; + + return i; +} + +static void guest_code(void *test_data) +{ + int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F; + int i; + + memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep)); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, NULL, NULL); + vmcb->save.idtr.limit = 0; + vmcb->save.rip = (u64)l2_guest_code; + + vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) | + BIT_ULL(INTERCEPT_PAUSE) | + BIT_ULL(INTERCEPT_HLT); + vmcb->control.intercept_exceptions = 0; + } else { + GUEST_ASSERT(prepare_for_vmx_operation(test_data)); + GUEST_ASSERT(load_vmcs(test_data)); + + prepare_vmcs(test_data, NULL, NULL); + GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0)); + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0)); + + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_HLT_EXITING); + } + + for (i = 0; i < ARRAY_SIZE(instructions); i++) { + struct emulated_instruction *insn = &instructions[i]; + uint32_t insn_len = get_instruction_length(insn); + uint32_t exit_insn_len; + u32 exit_reason; + + /* + * Copy the target instruction to the L2 code stream, and fill + * the remaining bytes with INT3s so that a missed intercept + * results in a consistent failure mode (SHUTDOWN). + */ + memcpy(l2_instruction, insn->opcode, insn_len); + memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + run_guest(vmcb, svm->vmcb_gpa); + exit_reason = vmcb->control.exit_code; + exit_insn_len = vmcb->control.next_rip - vmcb->save.rip; + GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction); + } else { + GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0); + exit_reason = vmreadz(VM_EXIT_REASON); + exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction); + } + + __GUEST_ASSERT(exit_reason == insn->exit_reason[f], + "Wanted exit_reason '0x%x' for '%s', got '0x%x'", + insn->exit_reason[f], insn->name, exit_reason); + + __GUEST_ASSERT(exit_insn_len == insn_len, + "Wanted insn_len '%u' for '%s', got '%u'", + insn_len, insn->name, exit_insn_len); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 6502aa23c2f8..3641a42934ac 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "test_util.h" #include "kvm_util.h" #include "processor.h" @@ -87,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -124,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) @@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector) "Expected L2 to ask for %d, L2 says it's done", vector); break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd); @@ -247,12 +245,12 @@ int main(int argc, char *argv[]) /* Verify the pending events comes back out the same as it went in. */ vcpu_events_get(vcpu, &events); - ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, - KVM_VCPUEVENT_VALID_PAYLOAD); - ASSERT_EQ(events.exception.pending, true); - ASSERT_EQ(events.exception.nr, SS_VECTOR); - ASSERT_EQ(events.exception.has_error_code, true); - ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); + TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, + KVM_VCPUEVENT_VALID_PAYLOAD); + TEST_ASSERT_EQ(events.exception.pending, true); + TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR); + TEST_ASSERT_EQ(events.exception.has_error_code, true); + TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); /* * Run for real with the pending #SS, L1 should get a VM-Exit due to diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c new file mode 100644 index 000000000000..a6b6da9cf7fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * This test verifies that L1 fails to enter L2 with an invalid CR3, and + * succeeds otherwise. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + vmcall(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = svm->vmcb->save.cr3; + svm->vmcb->save.cr3 = -1ull; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR); + + /* Now restore CR3 and make sure L2 runs successfully */ + svm->vmcb->save.cr3 = save_cr3; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + + /* Now restore CR3 and make sure L2 runs successfully */ + vmwrite(GUEST_CR3, save_cr3); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t guest_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2ceb5c78c442..2839f650e5c9 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_tsc_adjust_test - * * Copyright (C) 2018, Google LLC. * * IA32_TSC_ADJUST test @@ -22,6 +20,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -35,6 +34,8 @@ #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) +#define L2_GUEST_STACK_SIZE 64 + enum { PORT_ABORT = 0x1000, PORT_REPORT, @@ -72,42 +73,47 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_guest_code(void *data) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; + /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + /* + * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not + * intercepting the write so it should update L1's TSC_ADJUST. + */ + if (this_cpu_has(X86_FEATURE_VMX)) { + struct vmx_pages *vmx_pages = data; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + } else { + struct svm_test_data *svm = data; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + } check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - GUEST_DONE(); } @@ -119,16 +125,19 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t nested_gva; struct kvm_vcpu *vcpu; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (;;) { struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index e710b6e7fb38..4260c9e4f489 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" /* L2 is scaled up (from L1's perspective) by this factor */ @@ -79,7 +80,30 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC scaling for L2 */ + wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32); + + /* launch L2 */ + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; @@ -116,28 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static bool system_has_stable_tsc(void) +static void l1_guest_code(void *data) { - bool tsc_is_stable; - FILE *fp; - char buf[4]; - - fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); - if (fp == NULL) - return false; - - tsc_is_stable = fgets(buf, sizeof(buf), fp) && - !strncmp(buf, "tsc", sizeof(buf)); - - fclose(fp); - return tsc_is_stable; + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); } int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva = 0; uint64_t tsc_start, tsc_end; uint64_t tsc_khz; @@ -146,9 +161,10 @@ int main(int argc, char *argv[]) uint64_t l1_tsc_freq = 0; uint64_t l2_tsc_freq = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(system_has_stable_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. @@ -169,8 +185,13 @@ int main(int argc, char *argv[]) printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index 7f36c32fa760..c0d84827f736 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -1,15 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * * Copyright (C) 2022, Google LLC. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <stdint.h> #include <time.h> @@ -78,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -89,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", @@ -259,9 +254,9 @@ int main(int argc, char **argv) TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); __TEST_REQUIRE(token == MAGIC_TOKEN, - "This test must be run with the magic token %d.\n" - "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test."); + "This test must be run with the magic token via '-t %d'.\n" + "Running via nx_huge_pages_test.sh, which also handles " + "environment setup, is strongly recommended.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh index 0560149e66ed..caad084b8bfd 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e @@ -14,10 +13,21 @@ NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_reco NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) +# If we're already root, the host might not have sudo. +if [ $(whoami) == "root" ]; then + function do_sudo () { + "$@" + } +else + function do_sudo () { + sudo "$@" + } +fi + set +e function sudo_echo () { - echo "$1" | sudo tee -a "$2" > /dev/null + echo "$1" | do_sudo tee -a "$2" > /dev/null } NXECUTABLE="$(dirname $0)/nx_huge_pages_test" diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c new file mode 100644 index 000000000000..9cbf283ebc55 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_MSR_PLATFORM_INFO + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of controlling guest access to + * MSR_PLATFORM_INFO. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 + +static void guest_code(void) +{ + uint64_t msr_platform_info; + uint8_t vector; + + GUEST_SYNC(true); + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + + GUEST_SYNC(false); + vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info); + GUEST_ASSERT_EQ(vector, GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t msr_platform_info; + struct ucall uc; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); + vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, + msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + break; + } + } + +done: + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c new file mode 100644 index 000000000000..3eaa216b96c0 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023, Tencent, Inc. + */ +#include <x86intrin.h> + +#include "pmu.h" +#include "processor.h" + +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* + * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, + * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 6 + +/* + * Number of "extra" instructions that will be counted, i.e. the number of + * instructions that are needed to set up the loop and then disable the + * counter. 2 MOV, 2 XOR, 1 WRMSR. + */ +#define NUM_EXTRA_INSNS 5 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + +/* Track which architectural events are supported by hardware. */ +static uint32_t hardware_pmu_arch_events; + +static uint8_t kvm_pmu_version; +static bool kvm_has_perf_caps; + +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +struct kvm_intel_pmu_event { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; +}; + +/* + * Wrap the array to appease the compiler, as the macros used to construct each + * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the + * compiler often thinks the feature definitions aren't compile-time constants. + */ +static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) +{ + const struct kvm_intel_pmu_event __intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same as the + * general purpose architectural event. The fixed counter explicitly + * counts at the same frequency as the TSC, whereas the GP event counts + * at a fixed, but uarch specific, frequency. Bundle them here for + * simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, + }; + + kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); + + return __intel_event_to_feature[idx]; +} + +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code, + uint8_t pmu_version, + uint64_t perf_capabilities) +{ + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + sync_global_to_guest(vm, kvm_pmu_version); + sync_global_to_guest(vm, hardware_pmu_arch_events); + + /* + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. + */ + if (kvm_has_perf_caps) + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); + + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); + return vm; +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static uint8_t guest_get_pmu_version(void) +{ + /* + * Return the effective PMU version, i.e. the minimum between what KVM + * supports and what is enumerated to the guest. The host deliberately + * advertises a PMU version to the guest beyond what is actually + * supported by KVM to verify KVM doesn't freak out and do something + * bizarre with an architecturally valid, but unsupported, version. + */ + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); +} + +/* + * If an architectural event is supported and guaranteed to generate at least + * one "hit, assert that its count is non-zero. If an event isn't supported or + * the test can't guarantee the associated action will occur, then all bets are + * off regarding the count, i.e. no checks can be done. + * + * Sanity check that in all cases, the event doesn't count when it's disabled, + * and that KVM correctly emulates the write of an arbitrary value. + */ +static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) +{ + uint64_t count; + + count = _rdpmc(pmc); + if (!(hardware_pmu_arch_events & BIT(idx))) + goto sanity_checks; + + switch (idx) { + case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + break; + case INTEL_ARCH_BRANCHES_RETIRED_INDEX: + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); + break; + case INTEL_ARCH_LLC_REFERENCES_INDEX: + case INTEL_ARCH_LLC_MISSES_INDEX: + if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) && + !this_cpu_has(X86_FEATURE_CLFLUSH)) + break; + fallthrough; + case INTEL_ARCH_CPU_CYCLES_INDEX: + case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: + case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: + GUEST_ASSERT_NE(count, 0); + break; + case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: + case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: + __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, + "Expected top-down slots >= %u, got count = %lu", + NUM_INSNS_RETIRED, count); + break; + default: + break; + } + +sanity_checks: + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); + GUEST_ASSERT_EQ(_rdpmc(pmc), count); + + wrmsr(pmc_msr, 0xdead); + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); +} + +/* + * Enable and disable the PMC in a monolithic asm blob to ensure that the + * compiler can't insert _any_ code into the measured sequence. Note, ECX + * doesn't need to be clobbered as the input value, @pmc_msr, is restored + * before the end of the sequence. + * + * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. + * + * If forced emulation is enabled (and specified), force emulation on a subset + * of the measured code to verify that KVM correctly emulates instructions and + * branches retired events in conjunction with hardware also counting said + * events. + */ +#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ +do { \ + __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ + FEP "enter $0, $0\n\t" \ + clflush "\n\t" \ + "mfence\n\t" \ + "mov %[m], %%eax\n\t" \ + FEP "leave\n\t" \ + FEP "loop 1b\n\t" \ + FEP "mov %%edi, %%ecx\n\t" \ + FEP "xor %%eax, %%eax\n\t" \ + FEP "xor %%edx, %%edx\n\t" \ + "wrmsr\n\t" \ + :: "a"((uint32_t)_value), "d"(_value >> 32), \ + "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ + ); \ +} while (0) + +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +do { \ + wrmsr(_pmc_msr, 0); \ + \ + if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ + else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ + else \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ + \ + guest_assert_event_count(_idx, _pmc, _pmc_msr); \ +} while (0) + +static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, + uint32_t ctrl_msr, uint64_t ctrl_msr_value) +{ + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + + if (is_forced_emulation_enabled) + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); +} + +static void guest_test_arch_event(uint8_t idx) +{ + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + uint32_t pmu_version = guest_get_pmu_version(); + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ + bool guest_has_perf_global_ctrl = pmu_version >= 2; + struct kvm_x86_pmu_feature gp_event, fixed_event; + uint32_t base_pmc_msr; + unsigned int i; + + /* The host side shouldn't invoke this without a guest PMU. */ + GUEST_ASSERT(pmu_version); + + if (this_cpu_has(X86_FEATURE_PDCM) && + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) + base_pmc_msr = MSR_IA32_PMC0; + else + base_pmc_msr = MSR_IA32_PERFCTR0; + + gp_event = intel_event_to_feature(idx).gp_event; + GUEST_ASSERT_EQ(idx, gp_event.f.bit); + + GUEST_ASSERT(nr_gp_counters); + + for (i = 0; i < nr_gp_counters; i++) { + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | + ARCH_PERFMON_EVENTSEL_ENABLE | + intel_pmu_arch_events[idx]; + + wrmsr(MSR_P6_EVNTSEL0 + i, 0); + if (guest_has_perf_global_ctrl) + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); + + __guest_test_arch_event(idx, i, base_pmc_msr + i, + MSR_P6_EVNTSEL0 + i, eventsel); + } + + if (!guest_has_perf_global_ctrl) + return; + + fixed_event = intel_event_to_feature(idx).fixed_event; + if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) + return; + + i = fixed_event.f.bit; + + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + + __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, + MSR_CORE_PERF_FIXED_CTR0 + i, + MSR_CORE_PERF_GLOBAL_CTRL, + FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); +} + +static void guest_test_arch_events(void) +{ + uint8_t i; + + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) + guest_test_arch_event(i); + + GUEST_DONE(); +} + +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t length, uint32_t unavailable_mask) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Testing arch events requires a vPMU (there are no negative tests). */ + if (!pmu_version) + return; + + unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, + X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, + length); + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, + unavailable_mask); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +/* + * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs + * that aren't defined counter MSRs *probably* don't exist, but there's no + * guarantee that currently undefined MSR indices won't be used for something + * other than PMCs in the future. + */ +#define MAX_NR_GP_COUNTERS 8 +#define MAX_NR_FIXED_COUNTERS 3 + +#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ +__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ + "Expected %s on " #insn "(0x%x), got %s", \ + expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ + +#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ + __GUEST_ASSERT(val == expected, \ + "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ + msr, expected, val); + +static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, + uint64_t expected_val) +{ + uint8_t vector; + uint64_t val; + + vector = rdpmc_safe(rdpmc_idx, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); + if (expect_success) + GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); + + if (!is_forced_emulation_enabled) + return; + + vector = rdpmc_safe_fep(rdpmc_idx, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); + if (expect_success) + GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); +} + +static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, + uint8_t nr_counters, uint32_t or_mask) +{ + const bool pmu_has_fast_mode = !guest_get_pmu_version(); + uint8_t i; + + for (i = 0; i < nr_possible_counters; i++) { + /* + * TODO: Test a value that validates full-width writes and the + * width of the counters. + */ + const uint64_t test_val = 0xffff; + const uint32_t msr = base_msr + i; + + /* + * Fixed counters are supported if the counter is less than the + * number of enumerated contiguous counters *or* the counter is + * explicitly enumerated in the supported counters mask. + */ + const bool expect_success = i < nr_counters || (or_mask & BIT(i)); + + /* + * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are + * unsupported, i.e. doesn't #GP and reads back '0'. + */ + const uint64_t expected_val = expect_success ? test_val : 0; + const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && + msr != MSR_P6_PERFCTR1; + uint32_t rdpmc_idx; + uint8_t vector; + uint64_t val; + + vector = wrmsr_safe(msr, test_val); + GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); + + vector = rdmsr_safe(msr, &val); + GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector); + + /* On #GP, the result of RDMSR is undefined. */ + if (!expect_gp) + GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val); + + /* + * Redo the read tests with RDPMC, which has different indexing + * semantics and additional capabilities. + */ + rdpmc_idx = i; + if (base_msr == MSR_CORE_PERF_FIXED_CTR0) + rdpmc_idx |= INTEL_RDPMC_FIXED; + + guest_test_rdpmc(rdpmc_idx, expect_success, expected_val); + + /* + * KVM doesn't support non-architectural PMUs, i.e. it should + * impossible to have fast mode RDPMC. Verify that attempting + * to use fast RDPMC always #GPs. + */ + GUEST_ASSERT(!expect_success || !pmu_has_fast_mode); + rdpmc_idx |= INTEL_RDPMC_FAST; + guest_test_rdpmc(rdpmc_idx, false, -1ull); + + vector = wrmsr_safe(msr, 0); + GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); + } +} + +static void guest_test_gp_counters(void) +{ + uint8_t pmu_version = guest_get_pmu_version(); + uint8_t nr_gp_counters = 0; + uint32_t base_msr; + + if (pmu_version) + nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + + /* + * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is + * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number + * of GP counters. If there are no GP counters, require KVM to leave + * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but + * follow the spirit of the architecture and only globally enable GP + * counters, of which there are none. + */ + if (pmu_version > 1) { + uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); + + if (nr_gp_counters) + GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); + else + GUEST_ASSERT_EQ(global_ctrl, 0); + } + + if (this_cpu_has(X86_FEATURE_PDCM) && + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) + base_msr = MSR_IA32_PMC0; + else + base_msr = MSR_IA32_PERFCTR0; + + guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0); + GUEST_DONE(); +} + +static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t nr_gp_counters) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS, + nr_gp_counters); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +static void guest_test_fixed_counters(void) +{ + uint64_t supported_bitmask = 0; + uint8_t nr_fixed_counters = 0; + uint8_t i; + + /* Fixed counters require Architectural vPMU Version 2+. */ + if (guest_get_pmu_version() >= 2) + nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + + /* + * The supported bitmask for fixed counters was introduced in PMU + * version 5. + */ + if (guest_get_pmu_version() >= 5) + supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK); + + guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS, + nr_fixed_counters, supported_bitmask); + + for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { + uint8_t vector; + uint64_t val; + + if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { + vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, + FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for counter %u in FIXED_CTR_CTRL", i); + + vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL, + FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i); + continue; + } + + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); + + GUEST_ASSERT_NE(val, 0); + } + GUEST_DONE(); +} + +static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, + uint8_t nr_fixed_counters, + uint32_t supported_bitmask) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters, + pmu_version, perf_capabilities); + + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK, + supported_bitmask); + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS, + nr_fixed_counters); + + run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +static void test_intel_counters(void) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + unsigned int i; + uint8_t v, j; + uint32_t k; + + const uint64_t perf_caps[] = { + 0, + PMU_CAP_FW_WRITES, + }; + + /* + * To keep the total runtime reasonable, test only a handful of select, + * semi-arbitrary values for the mask of unavailable PMU events. Test + * 0 (all events available) and all ones (no events available) as well + * as alternating bit sequencues, e.g. to detect if KVM is checking the + * wrong bit(s). + */ + const uint32_t unavailable_masks[] = { + 0x0, + 0xffffffffu, + 0xaaaaaaaau, + 0x55555555u, + 0xf0f0f0f0u, + 0x0f0f0f0fu, + 0xa0a0a0a0u, + 0x0a0a0a0au, + 0x50505050u, + 0x05050505u, + }; + + /* + * Test up to PMU v5, which is the current maximum version defined by + * Intel, i.e. is the last version that is guaranteed to be backwards + * compatible with KVM's existing behavior. + */ + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); + + /* + * Detect the existence of events that aren't supported by selftests. + * This will (obviously) fail any time hardware adds support for a new + * event, but it's worth paying that price to keep the test fresh. + */ + TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, + "New architectural event(s) detected; please update this test (length = %u, mask = %x)", + this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), + this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + + /* + * Iterate over known arch events irrespective of KVM/hardware support + * to verify that KVM doesn't reject programming of events just because + * the *architectural* encoding is unsupported. Track which events are + * supported in hardware; the guest side will validate supported events + * count correctly, even if *enumeration* of the event is unsupported + * by KVM and/or isn't exposed to the guest. + */ + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { + if (this_pmu_has(intel_event_to_feature(i).gp_event)) + hardware_pmu_arch_events |= BIT(i); + } + + for (v = 0; v <= max_pmu_version; v++) { + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { + if (!kvm_has_perf_caps && perf_caps[i]) + continue; + + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + + /* + * Test single bits for all PMU version and lengths up + * the number of events +1 (to verify KVM doesn't do + * weird things if the guest length is greater than the + * host length). Explicitly test a mask of '0' and all + * ones i.e. all events being available and unavailable. + */ + for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { + for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) + test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); + } + + pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + for (j = 0; j <= nr_gp_counters; j++) + test_gp_counters(v, perf_caps[i], j); + + pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n", + v, perf_caps[i]); + for (j = 0; j <= nr_fixed_counters; j++) { + for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++) + test_fixed_counters(v, perf_caps[i], j, k); + } + } + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_is_pmu_enabled()); + + TEST_REQUIRE(host_cpu_is_intel); + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); + + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); + + test_intel_counters(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index 40507ed9fe8a..1c5b7611db24 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -9,81 +9,47 @@ * Verifies the expected behavior of allow lists and deny lists for * virtual PMU events. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" - -/* - * In lieu of copying perf_event.h into tools... - */ -#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) -#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) - -/* End of stuff taken from perf_event.h. */ - -/* Oddly, this isn't in perf_event.h. */ -#define ARCH_PERFMON_BRANCHES_RETIRED 5 +#include "test_util.h" #define NUM_BRANCHES 42 +#define MAX_TEST_EVENTS 10 -/* - * This is how the event selector and unit mask are stored in an AMD - * core performance event-select register. Intel's format is similar, - * but the event selector is only 8 bits. - */ -#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \ - (umask & 0xff) << 8) - -/* - * "Branch instructions retired", from the Intel SDM, volume 3, - * "Pre-defined Architectural Performance Events." - */ - -#define INTEL_BR_RETIRED EVENT(0xc4, 0) - -/* - * "Retired branch instructions", from Processor Programming Reference - * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, - * Preliminary Processor Programming Reference (PPR) for AMD Family - * 17h Model 31h, Revision B0 Processors, and Preliminary Processor - * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision - * B1 Processors Volume 1 of 2. - */ - -#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) - - -/* - * "Retired instructions", from Processor Programming Reference - * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, - * Preliminary Processor Programming Reference (PPR) for AMD Family - * 17h Model 31h, Revision B0 Processors, and Preliminary Processor - * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision - * B1 Processors Volume 1 of 2. - * --- and --- - * "Instructions retired", from the Intel SDM, volume 3, - * "Pre-defined Architectural Performance Events." - */ - -#define INST_RETIRED EVENT(0xc0, 0) +#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1) +#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1) +#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1) + +struct __kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; +}; /* - * This event list comprises Intel's eight architectural events plus - * AMD's "retired branch instructions" for Zen[123] (and possibly - * other AMD CPUs). + * This event list comprises Intel's known architectural events, plus AMD's + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. */ -static const uint64_t event_list[] = { - EVENT(0x3c, 0), - INST_RETIRED, - EVENT(0x3c, 1), - EVENT(0x2e, 0x4f), - EVENT(0x2e, 0x41), - EVENT(0xc4, 0), - EVENT(0xc5, 0), - EVENT(0xa4, 1), - AMD_ZEN_BR_RETIRED, +kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); + +static const struct __kvm_pmu_event_filter base_event_filter = { + .nevents = ARRAY_SIZE(base_event_filter.events), + .events = { + INTEL_ARCH_CPU_CYCLES, + INTEL_ARCH_INSTRUCTIONS_RETIRED, + INTEL_ARCH_REFERENCE_CYCLES, + INTEL_ARCH_LLC_REFERENCES, + INTEL_ARCH_LLC_MISSES, + INTEL_ARCH_BRANCHES_RETIRED, + INTEL_ARCH_BRANCHES_MISPREDICTED, + INTEL_ARCH_TOPDOWN_SLOTS, + AMD_ZEN_BRANCHES_RETIRED, + }, }; struct { @@ -144,9 +110,9 @@ static void intel_guest_code(void) for (;;) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); + ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED); wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); run_and_measure_loop(MSR_IA32_PMC0); @@ -168,9 +134,9 @@ static void amd_guest_code(void) for (;;) { wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED); wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | - ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED); run_and_measure_loop(MSR_K7_PERFCTR0); GUEST_SYNC(0); @@ -225,48 +191,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) return !r; } -static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) -{ - struct kvm_pmu_event_filter *f; - int size = sizeof(*f) + nevents * sizeof(f->events[0]); - - f = malloc(size); - TEST_ASSERT(f, "Out of memory"); - memset(f, 0, size); - f->nevents = nevents; - return f; -} - - -static struct kvm_pmu_event_filter * -create_pmu_event_filter(const uint64_t event_list[], int nevents, - uint32_t action, uint32_t flags) -{ - struct kvm_pmu_event_filter *f; - int i; - - f = alloc_pmu_event_filter(nevents); - f->action = action; - f->flags = flags; - for (i = 0; i < nevents; i++) - f->events[i] = event_list[i]; - - return f; -} - -static struct kvm_pmu_event_filter *event_filter(uint32_t action) -{ - return create_pmu_event_filter(event_list, - ARRAY_SIZE(event_list), - action, 0); -} - /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ -static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, - uint64_t event) +static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) { bool found = false; int i; @@ -279,15 +208,16 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, } if (found) f->nevents--; - return f; } #define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ do { \ uint64_t br = pmc_results.branches_retired; \ uint64_t ir = pmc_results.instructions_retired; \ + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ + br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ - if (br && br != NUM_BRANCHES) \ + if (br && !br_matched) \ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ __func__, br, NUM_BRANCHES); \ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ @@ -315,66 +245,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu) } static void test_with_filter(struct kvm_vcpu *vcpu, - struct kvm_pmu_event_filter *f) + struct __kvm_pmu_event_filter *__f) { + struct kvm_pmu_event_filter *f = (void *)__f; + vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); run_vcpu_and_sync_pmc_results(vcpu); } static void test_amd_deny_list(struct kvm_vcpu *vcpu) { - uint64_t event = EVENT(0x1C2, 0); - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .action = KVM_PMU_EVENT_DENY, + .nevents = 1, + .events = { + RAW_EVENT(0x1C2, 0), + }, + }; - f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0); - test_with_filter(vcpu, f); - free(f); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } static void test_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_ALLOW; + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + + remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); + remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); + remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_ALLOW; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED); + remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED); + remove_event(&f, AMD_ZEN_BRANCHES_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } @@ -399,9 +336,6 @@ static void test_pmu_config_disable(void (*guest_code)(void)) vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); vcpu = vm_vcpu_add(vm, 0, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_ASSERT(!sanity_check_pmu(vcpu), "Guest should not be able to use disabled PMU."); @@ -421,38 +355,13 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t family, uint32_t model) -{ - return family == 0x17 && model <= 0x0f; -} - -static bool is_zen2(uint32_t family, uint32_t model) -{ - return family == 0x17 && model >= 0x30 && model <= 0x3f; -} - -static bool is_zen3(uint32_t family, uint32_t model) -{ - return family == 0x19 && model <= 0x0f; -} - /* - * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. The AMD - * test code herein has been verified to work on Zen1, Zen2, and Zen3. - * - * Feel free to add more AMD CPUs that are documented to support event - * select 0xc2 umask 0 as "retired branch instructions." + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. */ static bool use_amd_pmu(void) { - uint32_t family = kvm_cpu_family(); - uint32_t model = kvm_cpu_model(); - - return host_cpu_is_amd && - (is_zen1(family, model) || - is_zen2(family, model) || - is_zen3(family, model)); + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; } /* @@ -462,9 +371,9 @@ static bool use_amd_pmu(void) * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake. */ #define MEM_INST_RETIRED 0xD0 -#define MEM_INST_RETIRED_LOAD EVENT(MEM_INST_RETIRED, 0x81) -#define MEM_INST_RETIRED_STORE EVENT(MEM_INST_RETIRED, 0x82) -#define MEM_INST_RETIRED_LOAD_STORE EVENT(MEM_INST_RETIRED, 0x83) +#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81) +#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82) +#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83) static bool supports_event_mem_inst_retired(void) { @@ -496,9 +405,9 @@ static bool supports_event_mem_inst_retired(void) * B1 Processors Volume 1 of 2. */ #define LS_DISPATCH 0x29 -#define LS_DISPATCH_LOAD EVENT(LS_DISPATCH, BIT(0)) -#define LS_DISPATCH_STORE EVENT(LS_DISPATCH, BIT(1)) -#define LS_DISPATCH_LOAD_STORE EVENT(LS_DISPATCH, BIT(2)) +#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0)) +#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1)) +#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2)) #define INCLUDE_MASKED_ENTRY(event_select, mask, match) \ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false) @@ -569,19 +478,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, const uint64_t masked_events[], const int nmasked_events) { - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .nevents = nmasked_events, + .action = KVM_PMU_EVENT_ALLOW, + .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + }; - f = create_pmu_event_filter(masked_events, nmasked_events, - KVM_PMU_EVENT_ALLOW, - KVM_PMU_EVENT_FLAG_MASKED_EVENTS); - test_with_filter(vcpu, f); - free(f); + memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + test_with_filter(vcpu, &f); } -/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ -#define MAX_FILTER_EVENTS 300 -#define MAX_TEST_EVENTS 10 - #define ALLOW_LOADS BIT(0) #define ALLOW_STORES BIT(1) #define ALLOW_LOADS_STORES BIT(2) @@ -742,32 +648,44 @@ static void add_dummy_events(uint64_t *events, int nevents) static void test_masked_events(struct kvm_vcpu *vcpu) { - int nevents = MAX_FILTER_EVENTS - MAX_TEST_EVENTS; - uint64_t events[MAX_FILTER_EVENTS]; + int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS; + uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; /* Run the test cases against a sparse PMU event filter. */ run_masked_events_tests(vcpu, events, 0); /* Run the test cases against a dense PMU event filter. */ - add_dummy_events(events, MAX_FILTER_EVENTS); + add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS); run_masked_events_tests(vcpu, events, nevents); } -static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events, - int nevents, uint32_t flags) +static int set_pmu_event_filter(struct kvm_vcpu *vcpu, + struct __kvm_pmu_event_filter *__f) { - struct kvm_pmu_event_filter *f; - int r; + struct kvm_pmu_event_filter *f = (void *)__f; + + return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); +} - f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags); - r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); - free(f); +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, + uint32_t flags, uint32_t action) +{ + struct __kvm_pmu_event_filter f = { + .nevents = 1, + .flags = flags, + .action = action, + .events = { + event, + }, + }; - return r; + return set_pmu_event_filter(vcpu, &f); } static void test_filter_ioctl(struct kvm_vcpu *vcpu) { + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct __kvm_pmu_event_filter f; uint64_t e = ~0ul; int r; @@ -775,15 +693,143 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) * Unfortunately having invalid bits set in event data is expected to * pass when flags == 0 (bits other than eventsel+umask). */ - r = run_filter_test(vcpu, &e, 1, 0); + r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail"); e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); + + f = base_event_filter; + f.action = PMU_EVENT_FILTER_INVALID_ACTION; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid action is expected to fail"); + + f = base_event_filter; + f.flags = PMU_EVENT_FILTER_INVALID_FLAGS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid flags is expected to fail"); + + f = base_event_filter; + f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Exceeding the max number of filter events should fail"); + + f = base_event_filter; + f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0); + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); +} + +static void intel_run_fixed_counter_guest_code(uint8_t idx) +{ + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0); + + /* Only OS_EN bit is enabled for fixed counter[idx]. */ + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx)); + } +} + +static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + uint32_t action, uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = { + .action = action, + .fixed_counter_bitmap = bitmap, + }; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + uint32_t action, + uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = action; + f.fixed_counter_bitmap = bitmap; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, + uint8_t nr_fixed_counters) +{ + unsigned int i; + uint32_t bitmap; + uint64_t count; + + TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, + "Invalid nr_fixed_counters"); + + /* + * Check the fixed performance counter can count normally when KVM + * userspace doesn't set any pmu filter. + */ + count = run_vcpu_to_sync(vcpu); + TEST_ASSERT(count, "Unexpected count value: %ld", count); + + for (i = 0; i < BIT(nr_fixed_counters); i++) { + bitmap = BIT(i); + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + + /* + * Check that fixed_counter_bitmap has higher priority than + * events[] when both are set. + */ + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + } +} + +static void test_fixed_counter_bitmap(void) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint8_t idx; + + /* + * Check that pmu_event_filter works as expected when it's applied to + * fixed performance counters. + */ + for (idx = 0; idx < nr_fixed_counters; idx++) { + vm = vm_create_with_one_vcpu(&vcpu, + intel_run_fixed_counter_guest_code); + vcpu_args_set(vcpu, 1, idx); + __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters); + kvm_vm_free(vm); + } } int main(int argc, char *argv[]) @@ -792,7 +838,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu, *vcpu2 = NULL; struct kvm_vm *vm; - TEST_REQUIRE(get_kvm_param_bool("enable_pmu")); + TEST_REQUIRE(kvm_is_pmu_enabled()); TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS)); @@ -801,9 +847,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_REQUIRE(sanity_check_pmu(vcpu)); if (use_amd_pmu()) @@ -829,6 +872,7 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_pmu_config_disable(guest_code); + test_fixed_counter_bitmap(); return 0; } diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c new file mode 100644 index 000000000000..1969f4ab9b28 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + */ +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/kvm_para.h> +#include <linux/memfd.h> +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#define BASE_DATA_SLOT 10 +#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) + +/* Horrific macro so that the line info is captured accurately :-( */ +#define memcmp_g(gpa, pattern, size) \ +do { \ + uint8_t *mem = (uint8_t *)gpa; \ + size_t i; \ + \ + for (i = 0; i < size; i++) \ + __GUEST_ASSERT(mem[i] == pattern, \ + "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ + pattern, i, gpa + i, mem[i]); \ +} while (0) + +static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + TEST_ASSERT(mem[i] == pattern, + "Host expected 0x%x at gpa 0x%lx, got 0x%x", + pattern, gpa + i, mem[i]); +} + +/* + * Run memory conversion tests with explicit conversion: + * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit + * to back/unback private memory. Subsequent accesses by guest to the gpa range + * will not cause exit to userspace. + * + * Test memory conversion scenarios with following steps: + * 1) Access private memory using private access and verify that memory contents + * are not visible to userspace. + * 2) Convert memory to shared using explicit conversions and ensure that + * userspace is able to access the shared regions. + * 3) Convert memory back to private using explicit conversions and ensure that + * userspace is again not able to access converted private regions. + */ + +#define GUEST_STAGE(o, s) { .offset = o, .size = s } + +enum ucall_syncs { + SYNC_SHARED, + SYNC_PRIVATE, +}; + +static void guest_sync_shared(uint64_t gpa, uint64_t size, + uint8_t current_pattern, uint8_t new_pattern) +{ + GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); +} + +static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +{ + GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); +} + +/* Arbitrary values, KVM doesn't care about the attribute flags. */ +#define MAP_GPA_SET_ATTRIBUTES BIT(0) +#define MAP_GPA_SHARED BIT(1) +#define MAP_GPA_DO_FALLOCATE BIT(2) + +static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, + bool do_fallocate) +{ + uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + + if (map_shared) + flags |= MAP_GPA_SHARED; + if (do_fallocate) + flags |= MAP_GPA_DO_FALLOCATE; + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, true, do_fallocate); +} + +static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, false, do_fallocate); +} + +struct { + uint64_t offset; + uint64_t size; +} static const test_ranges[] = { + GUEST_STAGE(0, PAGE_SIZE), + GUEST_STAGE(0, SZ_2M), + GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), + GUEST_STAGE(PAGE_SIZE, SZ_2M), + GUEST_STAGE(SZ_2M, PAGE_SIZE), +}; + +static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +{ + const uint8_t def_p = 0xaa; + const uint8_t init_p = 0xcc; + uint64_t j; + int i; + + /* Memory should be shared by default. */ + memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); + guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); + + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + uint8_t p1 = 0x11; + uint8_t p2 = 0x22; + uint8_t p3 = 0x33; + uint8_t p4 = 0x44; + + /* + * Set the test region to pattern one to differentiate it from + * the data range as a whole (contains the initial pattern). + */ + memset((void *)gpa, p1, size); + + /* + * Convert to private, set and verify the private data, and + * then verify that the rest of the data (map shared) still + * holds the initial pattern, and that the host always sees the + * shared memory (initial pattern). Unlike shared memory, + * punching a hole in private memory is destructive, i.e. + * previous values aren't guaranteed to be preserved. + */ + guest_map_private(gpa, size, do_fallocate); + + if (size > PAGE_SIZE) { + memset((void *)gpa, p2, PAGE_SIZE); + goto skip; + } + + memset((void *)gpa, p2, size); + guest_sync_private(gpa, size, p1); + + /* + * Verify that the private memory was set to pattern two, and + * that shared memory still holds the initial pattern. + */ + memcmp_g(gpa, p2, size); + if (gpa > base_gpa) + memcmp_g(base_gpa, init_p, gpa - base_gpa); + if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) + memcmp_g(gpa + size, init_p, + (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); + + /* + * Convert odd-number page frames back to shared to verify KVM + * also correctly handles holes in private ranges. + */ + for (j = 0; j < size; j += PAGE_SIZE) { + if ((j >> PAGE_SHIFT) & 1) { + guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); + guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); + + memcmp_g(gpa + j, p3, PAGE_SIZE); + } else { + guest_sync_private(gpa + j, PAGE_SIZE, p1); + } + } + +skip: + /* + * Convert the entire region back to shared, explicitly write + * pattern three to fill in the even-number frames before + * asking the host to verify (and write pattern four). + */ + guest_map_shared(gpa, size, do_fallocate); + memset((void *)gpa, p3, size); + guest_sync_shared(gpa, size, p3, p4); + memcmp_g(gpa, p4, size); + + /* Reset the shared memory back to the initial pattern. */ + memset((void *)gpa, init_p, size); + + /* + * Free (via PUNCH_HOLE) *all* private memory so that the next + * iteration starts from a clean slate, e.g. with respect to + * whether or not there are pages/folios in guest_mem. + */ + guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); + } +} + +static void guest_punch_hole(uint64_t gpa, uint64_t size) +{ + /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ + uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +/* + * Test that PUNCH_HOLE actually frees memory by punching holes without doing a + * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating + * (subsequent fault) should zero memory. + */ +static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +{ + const uint8_t init_p = 0xcc; + int i; + + /* + * Convert the entire range to private, this testcase is all about + * punching holes in guest_memfd, i.e. shared mappings aren't needed. + */ + guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + + /* + * Free all memory before each iteration, even for the !precise + * case where the memory will be faulted back in. Freeing and + * reallocating should obviously work, and freeing all memory + * minimizes the probability of cross-testcase influence. + */ + guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); + + /* Fault-in and initialize memory, and verify the pattern. */ + if (precise) { + memset((void *)gpa, init_p, size); + memcmp_g(gpa, init_p, size); + } else { + memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + } + + /* + * Punch a hole at the target range and verify that reads from + * the guest succeed and return zeroes. + */ + guest_punch_hole(gpa, size); + memcmp_g(gpa, 0, size); + } +} + +static void guest_code(uint64_t base_gpa) +{ + /* + * Run the conversion test twice, with and without doing fallocate() on + * the guest_memfd backing when converting between shared and private. + */ + guest_test_explicit_conversion(base_gpa, false); + guest_test_explicit_conversion(base_gpa, true); + + /* + * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd + * faulted in, once with only the target range faulted in. + */ + guest_test_punch_hole(base_gpa, false); + guest_test_punch_hole(base_gpa, true); + GUEST_DONE(); +} + +static void handle_exit_hypercall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint64_t gpa = run->hypercall.args[0]; + uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; + bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; + bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; + struct kvm_vm *vm = vcpu->vm; + + TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, + "Wanted MAP_GPA_RANGE (%u), got '%llu'", + KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); + + if (do_fallocate) + vm_guest_mem_fallocate(vm, gpa, size, map_shared); + + if (set_attributes) + vm_set_memory_attributes(vm, gpa, size, + map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); + run->hypercall.ret = 0; +} + +static bool run_vcpus; + +static void *__test_mem_conversions(void *__vcpu) +{ + struct kvm_vcpu *vcpu = __vcpu; + struct kvm_run *run = vcpu->run; + struct kvm_vm *vm = vcpu->vm; + struct ucall uc; + + while (!READ_ONCE(run_vcpus)) + ; + + for ( ;; ) { + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_HYPERCALL) { + handle_exit_hypercall(vcpu); + continue; + } + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: { + uint64_t gpa = uc.args[1]; + size_t size = uc.args[2]; + size_t i; + + TEST_ASSERT(uc.args[0] == SYNC_SHARED || + uc.args[0] == SYNC_PRIVATE, + "Unknown sync command '%ld'", uc.args[0]); + + for (i = 0; i < size; i += vm->page_size) { + size_t nr_bytes = min_t(size_t, vm->page_size, size - i); + uint8_t *hva = addr_gpa2hva(vm, gpa + i); + + /* In all cases, the host should observe the shared data. */ + memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); + + /* For shared, write the new pattern to guest memory. */ + if (uc.args[0] == SYNC_SHARED) + memset(hva, uc.args[4], nr_bytes); + } + break; + } + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +} + +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, + uint32_t nr_memslots) +{ + /* + * Allocate enough memory so that each vCPU's chunk of memory can be + * naturally aligned with respect to the size of the backing store. + */ + const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); + const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); + const size_t memfd_size = per_cpu_size * nr_vcpus; + const size_t slot_size = memfd_size / nr_memslots; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + pthread_t threads[KVM_MAX_VCPUS]; + struct kvm_vm *vm; + int memfd, i; + + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + TEST_ASSERT(slot_size * nr_memslots == memfd_size, + "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", + memfd_size, nr_memslots); + vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); + + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); + + memfd = vm_create_guest_memfd(vm, memfd_size, 0); + + for (i = 0; i < nr_memslots; i++) + vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, + BASE_DATA_SLOT + i, slot_size / vm->page_size, + KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); + + for (i = 0; i < nr_vcpus; i++) { + uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + + vcpu_args_set(vcpus[i], 1, gpa); + + /* + * Map only what is needed so that an out-of-bounds access + * results #PF => SHUTDOWN instead of data corruption. + */ + virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); + + pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); + } + + WRITE_ONCE(run_vcpus, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + kvm_vm_free(vm); + + /* + * Allocate and free memory from the guest_memfd after closing the VM + * fd. The guest_memfd is gifted a reference to its owning VM, i.e. + * should prevent the VM from being fully destroyed until the last + * reference to the guest_memfd is also put. + */ + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); + + close(memfd); +} + +static void usage(const char *cmd) +{ + puts(""); + printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); + puts(""); + backing_src_help("-s"); + puts(""); + puts(" -n: specify the number of vcpus (default: 1)"); + puts(""); + puts(" -m: specify the number of memslots (default: 1)"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; + uint32_t nr_memslots = 1; + uint32_t nr_vcpus = 1; + int opt; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { + switch (opt) { + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'n': + nr_vcpus = atoi_positive("nr_vcpus", optarg); + break; + case 'm': + nr_memslots = atoi_positive("nr_memslots", optarg); + break; + case 'h': + default: + usage(argv[0]); + exit(0); + } + } + + test_mem_conversions(src_type, nr_vcpus, nr_memslots); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c new file mode 100644 index 000000000000..13e72fcec8dd --- /dev/null +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Google LLC. + */ +#include <linux/kvm.h> +#include <pthread.h> +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +/* Arbitrarily selected to avoid overlaps with anything else */ +#define EXITS_TEST_GVA 0xc0000000 +#define EXITS_TEST_GPA EXITS_TEST_GVA +#define EXITS_TEST_NPAGES 1 +#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) +#define EXITS_TEST_SLOT 10 + +static uint64_t guest_repeatedly_read(void) +{ + volatile uint64_t value; + + while (true) + value = *((uint64_t *) EXITS_TEST_GVA); + + return value; +} + +static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +{ + int r; + + r = _vcpu_run(vcpu); + if (r) { + TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r)); + TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT); + } + return vcpu->run->exit_reason; +} + +const struct vm_shape protected_vm_shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, +}; + +static void test_private_access_memslot_deleted(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + pthread_t vm_thread; + void *thread_return; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, + KVM_MEM_GUEST_MEMFD); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + pthread_create(&vm_thread, NULL, + (void *(*)(void *))run_vcpu_get_exit_reason, + (void *)vcpu); + + vm_mem_region_delete(vm, EXITS_TEST_SLOT); + + pthread_join(vm_thread, &thread_return); + exit_reason = (uint32_t)(uint64_t)thread_return; + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +static void test_private_access_memslot_not_private(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + /* Add a non-private memslot (flags = 0) */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, 0); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + exit_reason = run_vcpu_get_exit_reason(vcpu); + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + test_private_access_memslot_deleted(); + test_private_access_memslot_not_private(); +} diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c index 4c416ebe7d66..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c @@ -57,7 +57,7 @@ int main(void) for (i = 0; i < KVM_MAX_VCPUS; i++) vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); - ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); vcpuN = vcpus[KVM_MAX_VCPUS - 1]; for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { @@ -65,8 +65,8 @@ int main(void) vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); } - ASSERT_EQ(pthread_cancel(thread), 0); - ASSERT_EQ(pthread_join(thread, NULL), 0); + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index b25d7556b638..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c @@ -4,7 +4,6 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -20,7 +19,7 @@ static void guest_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() != 0); + GUEST_ASSERT_NE(get_bsp_flag(), 0); GUEST_DONE(); } @@ -29,11 +28,25 @@ static void guest_not_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() == 0); + GUEST_ASSERT_EQ(get_bsp_flag(), 0); GUEST_DONE(); } +static void test_set_invalid_bsp(struct kvm_vm *vm) +{ + unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + int r; + + if (max_vcpu_id) { + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); + } + + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); +} + static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) { int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, @@ -65,7 +78,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu) stage); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -81,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, vm = vm_create(nr_vcpus); + test_set_invalid_bsp(vm); + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index 3610981d9162..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -10,7 +10,6 @@ * That bug allowed a user-mode program that called the KVM_SET_SREGS * ioctl to put a VCPU's local APIC into an invalid state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -42,13 +41,15 @@ do { \ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ } while (0) +#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \ + X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \ + X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ + X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) + static uint64_t calc_supported_cr4_feature_bits(void) { - uint64_t cr4; + uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | - X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | - X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; if (kvm_cpu_has(X86_FEATURE_LA57)) @@ -73,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -int main(int argc, char *argv[]) +static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) { struct kvm_sregs sregs; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t cr4; int rc, i; - /* - * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and - * use it to verify all supported CR4 bits can be set prior to defining - * the vCPU model, i.e. without doing KVM_SET_CPUID2. - */ - vm = vm_create_barebones(); - vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vcpu, &sregs); - - sregs.cr0 = 0; - sregs.cr4 |= calc_supported_cr4_feature_bits(); - cr4 = sregs.cr4; - + sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + sregs.cr4 |= cr4; rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); - /* Verify all unsupported features are rejected by KVM. */ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); @@ -120,10 +116,28 @@ int main(int argc, char *argv[]) /* NW without CD is illegal, as is PG without PE. */ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); +} + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + /* + * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and + * use it to verify KVM enforces guest CPUID even if *userspace* never + * sets CPUID. + */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4); kvm_vm_free(vm); - /* Create a "real" VM and verify APIC_BASE can be set. */ + /* Create a "real" VM with a fully populated guest CPUID and verify + * APIC_BASE and all supported CR4 can be set. + */ vm = vm_create_with_one_vcpu(&vcpu, NULL); vcpu_sregs_get(vcpu, &sregs); @@ -136,6 +150,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); + test_cr_bits(vcpu, calc_supported_cr4_feature_bits()); + kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c new file mode 100644 index 000000000000..b238615196ad --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kvm.h> +#include <linux/psp-sev.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <stdlib.h> +#include <errno.h> +#include <pthread.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "kselftest.h" + +#define SVM_SEV_FEAT_DEBUG_SWAP 32u + +/* + * Some features may have hidden dependencies, or may only work + * for certain VM types. Err on the side of safety and don't + * expect that all supported features can be passed one by one + * to KVM_SEV_INIT2. + * + * (Well, right now there's only one...) + */ +#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP + +int kvm_fd; +u64 supported_vmsa_features; +bool have_sev_es; +bool have_snp; + +static int __sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + struct kvm_sev_cmd cmd = { + .id = cmd_id, + .data = (uint64_t)data, + .sev_fd = open_sev_dev_path_or_exit(), + }; + int ret; + + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); + TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, + "%d failed: fw error: %d\n", + cmd_id, cmd.error); + + return ret; +} + +static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == 0, + "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", + ret, errno); + kvm_vm_free(vm); +} + +static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "KVM_SEV_INIT2 should fail, %s.", + msg); + kvm_vm_free(vm); +} + +void test_vm_types(void) +{ + test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); + + /* + * TODO: check that unsupported types cannot be created. Probably + * a separate selftest. + */ + if (have_sev_es) + test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + + if (have_snp) + test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){}); + + test_init2_invalid(0, &(struct kvm_sev_init){}, + "VM type is KVM_X86_DEFAULT_VM"); + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, + "VM type is KVM_X86_SW_PROTECTED_VM"); +} + +void test_flags(uint32_t vm_type) +{ + int i; + + for (i = 0; i < 32; i++) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .flags = BIT(i) }, + "invalid flag"); +} + +void test_features(uint32_t vm_type, uint64_t supported_features) +{ + int i; + + for (i = 0; i < 64; i++) { + if (!(supported_features & BIT_ULL(i))) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, + "unknown feature"); + else if (KNOWN_FEATURES & BIT_ULL(i)) + test_init2(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); + } +} + +int main(int argc, char *argv[]) +{ + int kvm_fd = open_kvm_dev_path_or_exit(); + bool have_sev; + + TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES) == 0); + kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES, + &supported_vmsa_features); + + have_sev = kvm_cpu_has(X86_FEATURE_SEV); + TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), + "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), + "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + + have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP); + TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)), + "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not", + kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM); + + test_vm_types(); + + test_flags(KVM_X86_SEV_VM); + if (have_sev_es) + test_flags(KVM_X86_SEV_ES_VM); + if (have_snp) + test_flags(KVM_X86_SNP_VM); + + test_features(KVM_X86_SEV_VM, 0); + if (have_sev_es) + test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + if (have_snp) + test_features(KVM_X86_SNP_VM, supported_vmsa_features); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c index c7ef97561038..0a6dfba3905b 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c @@ -10,11 +10,9 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" -#include "svm_util.h" +#include "sev.h" #include "kselftest.h" -#define SEV_POLICY_ES 0b100 - #define NR_MIGRATE_TEST_VCPUS 4 #define NR_MIGRATE_TEST_VMS 3 #define NR_LOCK_TESTING_THREADS 3 @@ -22,46 +20,24 @@ bool have_sev_es; -static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error) -{ - struct kvm_sev_cmd cmd = { - .id = cmd_id, - .data = (uint64_t)data, - .sev_fd = open_sev_dev_path_or_exit(), - }; - int ret; - - ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); - *fw_error = cmd.error; - return ret; -} - -static void sev_ioctl(int vm_fd, int cmd_id, void *data) -{ - int ret; - __u32 fw_error; - - ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error); - TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS, - "%d failed: return code: %d, errno: %d, fw error: %d", - cmd_id, ret, errno, fw_error); -} - static struct kvm_vm *sev_vm_create(bool es) { struct kvm_vm *vm; - struct kvm_sev_launch_start start = { 0 }; int i; vm = vm_create_barebones(); - sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL); + if (!es) + sev_vm_init(vm); + else + sev_es_vm_init(vm); + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) __vm_vcpu_add(vm, i); + + sev_vm_launch(vm, es ? SEV_POLICY_ES : 0); + if (es) - start.policy |= SEV_POLICY_ES; - sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start); - if (es) - sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); return vm; } @@ -91,7 +67,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); } static void test_sev_migrate_from(bool es) @@ -113,7 +89,7 @@ static void test_sev_migrate_from(bool es) /* Migrate the guest back to the original VM. */ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, - "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + "VM that was migrated from should be dead. ret %d, errno: %d", ret, errno); kvm_vm_free(src_vm); @@ -172,7 +148,7 @@ static void test_sev_migrate_parameters(void) vm_no_sev = aux_vm_create(true); ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d\n", ret, + "Migrations require SEV enabled. ret %d, errno: %d", ret, errno); if (!have_sev_es) @@ -181,31 +157,31 @@ static void test_sev_migrate_parameters(void) sev_vm = sev_vm_create(/* es= */ false); sev_es_vm = sev_vm_create(/* es= */ true); sev_es_vm_no_vmsa = vm_create_barebones(); - sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); + sev_es_vm_init(sev_es_vm_no_vmsa); __vm_vcpu_add(sev_es_vm_no_vmsa, 1); ret = __sev_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", ret, errno); kvm_vm_free(sev_vm); @@ -227,16 +203,16 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_mirror_create(dst, src); - TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); } -static void verify_mirror_allowed_cmds(int vm_fd) +static void verify_mirror_allowed_cmds(struct kvm_vm *vm) { struct kvm_sev_guest_status status; + int cmd_id; - for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) { + for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) { int ret; - __u32 fw_error; /* * These commands are allowed for mirror VMs, all others are @@ -256,14 +232,14 @@ static void verify_mirror_allowed_cmds(int vm_fd) * These commands should be disallowed before the data * parameter is examined so NULL is OK here. */ - ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error); + ret = __vm_sev_ioctl(vm, cmd_id, NULL); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able call command: %d. ret: %d, errno: %d\n", + "Should not be able call command: %d. ret: %d, errno: %d", cmd_id, ret, errno); } - sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status); + vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status); } static void test_sev_mirror(bool es) @@ -281,9 +257,9 @@ static void test_sev_mirror(bool es) __vm_vcpu_add(dst_vm, i); if (es) - sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); - verify_mirror_allowed_cmds(dst_vm->fd); + verify_mirror_allowed_cmds(dst_vm); kvm_vm_free(src_vm); kvm_vm_free(dst_vm); @@ -301,18 +277,18 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to self. ret: %d, errno: %d\n", + "Should not be able copy context to self. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + "Copy context requires SEV enabled. ret %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_with_vcpu, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", ret, errno); if (!have_sev_es) @@ -322,13 +298,13 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); kvm_vm_free(sev_es_vm); diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c new file mode 100644 index 000000000000..86ad1c7d068f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <math.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "linux/psp-sev.h" +#include "sev.h" + + +#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) + +static void guest_snp_code(void) +{ + uint64_t sev_msr = rdmsr(MSR_AMD64_SEV); + + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + vmgexit(); +} + +static void guest_sev_es_code(void) +{ + /* TODO: Check CPUID after GHCB-based hypercall support is added. */ + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); + + /* + * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply + * force "termination" to signal "done" via the GHCB MSR protocol. + */ + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + vmgexit(); +} + +static void guest_sev_code(void) +{ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); + GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + + GUEST_DONE(); +} + +/* Stash state passed via VMSA before any compiled code runs. */ +extern void guest_code_xsave(void); +asm("guest_code_xsave:\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" + "xsave (%rdi)\n" + "jmp guest_sev_es_code"); + +static void compare_xsave(u8 *from_host, u8 *from_guest) +{ + int i; + bool bad = false; + for (i = 0; i < 4095; i++) { + if (from_host[i] != from_guest[i]) { + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); + bad = true; + } + } + + if (bad) + abort(); +} + +static void test_sync_vmsa(uint32_t type, uint64_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t gva; + void *hva; + + double x87val = M_PI; + struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; + + vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu); + gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); + hva = addr_gva2hva(vm, gva); + + vcpu_args_set(vcpu, 1, gva); + + asm("fninit\n" + "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" + "fldl %3\n" + "xsave (%2)\n" + "fstp %%st\n" + : "=m"(xsave) + : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) + : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); + vcpu_xsave_set(vcpu, &xsave); + + vm_sev_launch(vm, policy, NULL); + + /* This page is shared, so make it decrypted. */ + memset(hva, 0, PAGE_SIZE); + + vcpu_run(vcpu); + + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + + compare_xsave((u8 *)&xsave, (u8 *)hva); + + kvm_vm_free(vm); +} + +static void test_sev(void *guest_code, uint32_t type, uint64_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); + + /* TODO: Validate the measurement is as expected. */ + vm_sev_launch(vm, policy, NULL); + + for (;;) { + vcpu_run(vcpu); + + if (is_sev_es_vm(vm)) { + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + break; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + continue; + case UCALL_DONE: + return; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected exit: %s", + exit_reason_str(vcpu->run->exit_reason)); + } + } + + kvm_vm_free(vm); +} + +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + set_idt(&idt); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_shutdown(uint32_t type, uint64_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, policy, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + +static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy) +{ + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + + if (type == KVM_X86_SNP_VM) + test_sev(guest, type, policy | SNP_POLICY_DBG); + else + test_sev(guest, type, policy | SEV_POLICY_NO_DBG); + test_sev(guest, type, policy); + + if (type == KVM_X86_SEV_VM) + return; + + test_sev_shutdown(type, policy); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { + test_sync_vmsa(type, policy); + if (type == KVM_X86_SNP_VM) + test_sync_vmsa(type, policy | SNP_POLICY_DBG); + else + test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG); + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); + + test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0); + + if (kvm_cpu_has(X86_FEATURE_SEV_ES)) + test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES); + + if (kvm_cpu_has(X86_FEATURE_SEV_SNP)) + test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy()); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index 06edf00a97d6..fabeeaddfb3a 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -5,9 +5,6 @@ * Test that KVM emulates instructions in response to EPT violations when * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" #include "test_util.h" @@ -60,10 +57,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR); rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); @@ -74,7 +68,7 @@ int main(int argc, char *argv[]) MEM_REGION_SIZE / PAGE_SIZE, 0); gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); @@ -102,7 +96,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; default: - TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); } kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index e18b86666e1f..55c88d664a94 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -4,7 +4,6 @@ * * Tests for SMM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 4c4925a8ab45..f2c7a1c297e3 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -6,7 +6,6 @@ * * Tests for vCPU state save/restore, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -139,6 +138,78 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[PAGE_SIZE]; + + memset(buffer, 0xcc, sizeof(buffer)); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { @@ -153,10 +224,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -209,6 +281,34 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. + */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 32bef39bec21..917b6066cfc1 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm) x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); GUEST_ASSERT(vintr_irq_called); GUEST_ASSERT(intr_irq_called); @@ -93,9 +90,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index d6fcdcc3af31..00135cbba35e 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c @@ -48,12 +48,9 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 4e2479716da6..7b6481d6c0d3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -8,7 +8,6 @@ * Copyright (C) 2021, Red Hat, Inc. * */ - #include <stdatomic.h> #include <stdio.h> #include <unistd.h> @@ -34,13 +33,12 @@ static void l2_guest_code_int(void); static void guest_int_handler(struct ex_regs *regs) { int_fired++; - GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int, - regs->rip, (unsigned long)l2_guest_code_int); + GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int); } static void l2_guest_code_int(void) { - GUEST_ASSERT_1(int_fired == 1, int_fired); + GUEST_ASSERT_EQ(int_fired, 1); /* * Same as the vmmcall() function, but with a ud2 sneaked after the @@ -53,7 +51,7 @@ static void l2_guest_code_int(void) : "rbx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); - GUEST_ASSERT_1(bp_fired == 1, bp_fired); + GUEST_ASSERT_EQ(bp_fired, 1); hlt(); } @@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs) if (nmi_stage_get() == 1) { vmmcall(); - GUEST_ASSERT(false); + GUEST_FAIL("Unexpected resume after VMMCALL"); } else { - GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 3); GUEST_DONE(); } } @@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i } run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i clgi(); x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); - GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 1); nmi_stage_inc(); stgi(); @@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i vmcb->control.next_rip = vmcb->save.rip + 2; run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -152,9 +152,6 @@ static void run_test(bool is_nmi) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); vm_install_exception_handler(vm, INT_NR, guest_int_handler); @@ -166,7 +163,7 @@ static void run_test(bool is_nmi) idt_alt_vm = vm_vaddr_alloc_page(vm); idt_alt = addr_gva2hva(vm, idt_alt_vm); - idt = addr_gva2hva(vm, vm->idt); + idt = addr_gva2hva(vm, vm->arch.idt); memcpy(idt_alt, idt, getpagesize()); } else { idt_alt_vm = 0; @@ -185,7 +182,7 @@ static void run_test(bool is_nmi) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; /* NOT REACHED */ case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..8a62cca28cfb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 2da89fdc2471..8fa3948b0170 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c @@ -8,14 +8,14 @@ * including requesting an invalid register set, updates to/from values * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <pthread.h> +#include "kvm_test_harness.h" #include "test_util.h" #include "kvm_util.h" #include "processor.h" @@ -40,12 +40,14 @@ void guest_code(void) : "rax", "rbx"); } +KVM_ONE_VCPU_TEST_SUITE(sync_regs_test); + static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) { #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) REG_COMPARE(rax); REG_COMPARE(rbx); @@ -80,58 +82,182 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, #define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) #define INVALID_SYNC_FIELD 0x80000000 -int main(int argc, char *argv[]) +/* + * Set an exception as pending *and* injected while KVM is processing events. + * KVM is supposed to ignore/drop pending exceptions if userspace is also + * requesting that an exception be injected. + */ +static void *race_events_inj_pen(void *arg) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_run *run; - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_vcpu_events events; - int rv, cap; + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); - TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); + WRITE_ONCE(events->exception.nr, UD_VECTOR); + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.injected, 1); + WRITE_ONCE(events->exception.pending, 1); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Set an invalid exception vector while KVM is processing events. KVM is + * supposed to reject any vector >= 32, as well as NMIs (vector 2). + */ +static void *race_events_exc(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.nr, UD_VECTOR); + WRITE_ONCE(events->exception.pending, 1); + WRITE_ONCE(events->exception.nr, 255); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is + * illegal, and KVM's MMU heavily relies on vCPU state being valid. + */ +static noinline void *race_sregs_cr4(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + __u64 *cr4 = &run->s.regs.sregs.cr4; + __u64 pae_enabled = *cr4; + __u64 pae_disabled = *cr4 & ~X86_CR4_PAE; - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS); + WRITE_ONCE(*cr4, pae_enabled); + asm volatile(".rept 512\n\t" + "nop\n\t" + ".endr"); + WRITE_ONCE(*cr4, pae_disabled); + + pthread_testcancel(); + } + + return NULL; +} + +static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer) +{ + const time_t TIMEOUT = 2; /* seconds, roughly */ + struct kvm_x86_state *state; + struct kvm_translation tr; + struct kvm_run *run; + pthread_t thread; + time_t t; run = vcpu->run; + run->kvm_valid_regs = KVM_SYNC_X86_SREGS; + vcpu_run(vcpu); + run->kvm_valid_regs = 0; + + /* Save state *before* spawning the thread that mucks with vCPU state. */ + state = vcpu_save_state(vcpu); + + /* + * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE + * should already be set in guest state. + */ + TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) && + (run->s.regs.sregs.efer & EFER_LME), + "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d", + !!(run->s.regs.sregs.cr4 & X86_CR4_PAE), + !!(run->s.regs.sregs.efer & EFER_LME)); + + TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0); + + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + /* + * Reload known good state if the vCPU triple faults, e.g. due + * to the unhandled #GPs being injected. VMX preserves state + * on shutdown, but SVM synthesizes an INIT as the VMCB state + * is architecturally undefined on triple fault. + */ + if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN) + vcpu_load_state(vcpu, state); + + if (racer == race_sregs_cr4) { + tr = (struct kvm_translation) { .linear_address = 0 }; + __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr); + } + } + + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_x86_state_cleanup(state); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code) +{ + struct kvm_run *run = vcpu->run; + int rv; + /* Request reading invalid register set from VCPU. */ run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; +} + +KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code) +{ + struct kvm_run *run = vcpu->run; + int rv; /* Request setting invalid register set into VCPU. */ run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; +} + +KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_vcpu_events events; + struct kvm_sregs sregs; + struct kvm_regs regs; /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ run->kvm_valid_regs = TEST_SYNC_FIELDS; - rv = _vcpu_run(vcpu); + vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); vcpu_regs_get(vcpu, ®s); @@ -142,6 +268,19 @@ int main(int argc, char *argv[]) vcpu_events_get(vcpu, &events); compare_vcpu_events(&events, &run->s.regs.events); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_vcpu_events events; + struct kvm_sregs sregs; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); /* Set and verify various register values. */ run->s.regs.regs.rbx = 0xBAD1DEA; @@ -150,7 +289,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; - rv = _vcpu_run(vcpu); + vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1, "rbx sync regs value incorrect 0x%llx.", @@ -167,6 +306,11 @@ int main(int argc, char *argv[]) vcpu_events_get(vcpu, &events); compare_vcpu_events(&events, &run->s.regs.events); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code) +{ + struct kvm_run *run = vcpu->run; /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. @@ -174,11 +318,22 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.rbx = 0xDEADBEEF; - rv = _vcpu_run(vcpu); + vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF, "rbx sync regs value incorrect 0x%llx.", run->s.regs.regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); /* Clear kvm_valid_regs bits and kvm_dirty_bits. * Verify s.regs values are not overwritten with existing guest values @@ -187,9 +342,10 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = 0; run->kvm_dirty_regs = 0; run->s.regs.regs.rbx = 0xAAAA; + vcpu_regs_get(vcpu, ®s); regs.rbx = 0xBAC0; vcpu_regs_set(vcpu, ®s); - rv = _vcpu_run(vcpu); + vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, "rbx sync regs value incorrect 0x%llx.", @@ -198,6 +354,17 @@ int main(int argc, char *argv[]) TEST_ASSERT(regs.rbx == 0xBAC0 + 1, "rbx guest value incorrect 0x%llx.", regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code) +{ + struct kvm_run *run = vcpu->run; + struct kvm_regs regs; + + /* Run once to get register set */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten * with existing guest values but that guest values are overwritten @@ -206,7 +373,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = 0; run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.rbx = 0xBBBB; - rv = _vcpu_run(vcpu); + vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, "rbx sync regs value incorrect 0x%llx.", @@ -215,8 +382,30 @@ int main(int argc, char *argv[]) TEST_ASSERT(regs.rbx == 0xBBBB + 1, "rbx guest value incorrect 0x%llx.", regs.rbx); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code) +{ + race_sync_regs(vcpu, race_sregs_cr4); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code) +{ + race_sync_regs(vcpu, race_events_exc); +} + +KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code) +{ + race_sync_regs(vcpu, race_events_inj_pen); +} - kvm_vm_free(vm); +int main(int argc, char *argv[]) +{ + int cap; + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); + TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); - return 0; + return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..56306a19144a 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index c9f67702f657..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c @@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) ksft_test_result_pass("stage %d passed\n", stage + 1); return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -103,39 +103,39 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); val = 0; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ run_vcpu(vcpu, 1); val = 1ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ run_vcpu(vcpu, 2); val = 2ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Host: writes to MSR_IA32_TSC set the host-side offset * and therefore do not change MSR_IA32_TSC_ADJUST. */ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); run_vcpu(vcpu, 3); /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); /* Restore previous value. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the @@ -143,8 +143,8 @@ int main(void) */ run_vcpu(vcpu, 4); val = 3ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side @@ -152,8 +152,8 @@ int main(void) */ run_vcpu(vcpu, 5); val = 4ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 5b669818e39a..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 85f34ca7e49e..1e5e564523b3 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -17,14 +17,11 @@ * delivered into the guest or not. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> #include <string.h> #include <time.h> -#include "kvm_util_base.h" #include "kvm_util.h" #include "mce.h" #include "processor.h" @@ -89,7 +86,7 @@ static void ucna_injection_guest_code(void) wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); /* Enables interrupt in guest. */ - asm volatile("sti"); + sti(); /* Let user space inject the first UCNA */ GUEST_SYNC(SYNC_FIRST_UCNA); @@ -143,7 +140,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); printf("vCPU received GP in guest.\n"); } @@ -188,7 +185,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); @@ -198,7 +195,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); @@ -208,7 +205,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, "vCPU assertion failure: %s.\n", + TEST_ASSERT(false, "vCPU assertion failure: %s.", (const char *)uc.args[0]); } @@ -271,7 +268,7 @@ int main(int argc, char *argv[]) kvm_check_cap(KVM_CAP_MCE); - vm = __vm_create(VM_MODE_DEFAULT, 3, 0); + vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0); kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, &supported_mcg_caps); @@ -285,10 +282,6 @@ int main(int argc, char *argv[]) cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(ucna_vcpu); - vcpu_init_descriptor_tables(cmcidis_vcpu); - vcpu_init_descriptor_tables(cmci_vcpu); vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 0cb51fa42773..be7d72f3c029 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count) end = (unsigned long)buffer + 8192; asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); - GUEST_ASSERT_1(count == 0, count); - GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end); + GUEST_ASSERT_EQ(count, 0); + GUEST_ASSERT_EQ((unsigned long)buffer, end); } static void guest_code(void) @@ -43,7 +43,9 @@ static void guest_code(void) memset(buffer, 0, sizeof(buffer)); guest_ins_port80(buffer, 8192); for (i = 0; i < 8192; i++) - GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]); + __GUEST_ASSERT(buffer[i] == 0xaa, + "Expected '0xaa', got '0x%x' at buffer[%u]", + buffer[i], i); GUEST_DONE(); } @@ -69,7 +71,7 @@ int main(int argc, char *argv[]) break; TEST_ASSERT(run->io.port == 0x80, - "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + "Expected I/O at port 0x80, got port 0x%x", run->io.port); /* * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. @@ -83,7 +85,7 @@ int main(int argc, char *argv[]) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); + memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE); vcpu_regs_set(vcpu, ®s); } @@ -91,7 +93,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 3533dc2fbfee..8463a9956410 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -4,19 +4,13 @@ * * Tests for exiting into userspace on registered MSRs */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> +#include "kvm_test_harness.h" #include "test_util.h" #include "kvm_util.h" #include "vmx.h" -/* Forced emulation prefix, used to invoke the emulator unconditionally. */ -#define KVM_FEP "ud2; .byte 'k', 'v', 'm';" -#define KVM_FEP_LENGTH 5 -static int fep_available = 1; - #define MSR_NON_EXISTENT 0x474f4f00 static u64 deny_bits = 0; @@ -260,14 +254,7 @@ static void guest_code_filter_allow(void) GUEST_ASSERT(data == 2); GUEST_ASSERT(guest_exception_count == 0); - /* - * Test to see if the instruction emulator is available (ie: the module - * parameter 'kvm.force_emulation_prefix=1' is set). This instruction - * will #UD if it isn't available. - */ - __asm__ __volatile__(KVM_FEP "nop"); - - if (fep_available) { + if (is_forced_emulation_enabled) { /* Let userspace know we aren't done. */ GUEST_SYNC(0); @@ -356,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -388,12 +381,6 @@ static void guest_fep_gp_handler(struct ex_regs *regs) &em_wrmsr_start, &em_wrmsr_end); } -static void guest_ud_handler(struct ex_regs *regs) -{ - fep_available = 0; - regs->rip += KVM_FEP_LENGTH; -} - static void check_for_guest_assert(struct kvm_vcpu *vcpu) { struct ucall uc; @@ -527,14 +514,14 @@ static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu) process_ucall_done(vcpu); } -static void test_msr_filter_allow(void) +KVM_ONE_VCPU_TEST_SUITE(user_msr); + +KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; + struct kvm_vm *vm = vcpu->vm; + uint64_t cmd; int rc; - vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow); - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); @@ -544,9 +531,6 @@ static void test_msr_filter_allow(void) vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ @@ -561,11 +545,11 @@ static void test_msr_filter_allow(void) run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); vcpu_run(vcpu); - vm_install_exception_handler(vm, UD_VECTOR, NULL); + cmd = process_ucall(vcpu); - if (process_ucall(vcpu) != UCALL_DONE) { + if (is_forced_emulation_enabled) { + TEST_ASSERT_EQ(cmd, UCALL_SYNC); vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); /* Process emulated rdmsr and wrmsr instructions. */ @@ -583,10 +567,9 @@ static void test_msr_filter_allow(void) /* Confirm the guest completed without issues. */ run_guest_then_process_ucall_done(vcpu); } else { + TEST_ASSERT_EQ(cmd, UCALL_DONE); printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); } - - kvm_vm_free(vm); } static int handle_ucall(struct kvm_vcpu *vcpu) @@ -646,16 +629,12 @@ static void handle_wrmsr(struct kvm_run *run) } } -static void test_msr_filter_deny(void) +KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_run *run; + struct kvm_vm *vm = vcpu->vm; + struct kvm_run *run = vcpu->run; int rc; - vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny); - run = vcpu->run; - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL | @@ -689,18 +668,13 @@ static void test_msr_filter_deny(void) done: TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space"); TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space"); - - kvm_vm_free(vm); } -static void test_msr_permission_bitmap(void) +KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; + struct kvm_vm *vm = vcpu->vm; int rc; - vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap); - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); @@ -714,9 +688,9 @@ static void test_msr_permission_bitmap(void) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); - run_guest_then_process_ucall_done(vcpu); - kvm_vm_free(vm); + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); + run_guest_then_process_ucall_done(vcpu); } #define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \ @@ -786,31 +760,18 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm) } /* Test that attempts to write to the unused bits in a flag fails. */ -static void test_user_exit_msr_flags(void) +KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - vm = vm_create_with_one_vcpu(&vcpu, NULL); + struct kvm_vm *vm = vcpu->vm; /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */ run_user_space_msr_flag_test(vm); /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */ run_msr_filter_flag_test(vm); - - kvm_vm_free(vm); } int main(int argc, char *argv[]) { - test_msr_filter_allow(); - - test_msr_filter_deny(); - - test_msr_permission_bitmap(); - - test_user_exit_msr_flags(); - - return 0; + return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index 2bed5fb3a0d6..a81a24761aac 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", run->internal.suberror); break; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index e4ad5fef52ff..98cb6bdab3e6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <linux/bitmap.h> @@ -28,16 +25,16 @@ #define NESTED_TEST_MEM1 0xc0001000 #define NESTED_TEST_MEM2 0xc0002000 -static void l2_guest_code(void) +static void l2_guest_code(u64 *a, u64 *b) { - *(volatile uint64_t *)NESTED_TEST_MEM1; - *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + READ_ONCE(*a); + WRITE_ONCE(*a, 1); GUEST_SYNC(true); GUEST_SYNC(false); - *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + WRITE_ONCE(*b, 1); GUEST_SYNC(true); - *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + WRITE_ONCE(*b, 1); GUEST_SYNC(true); GUEST_SYNC(false); @@ -45,17 +42,33 @@ static void l2_guest_code(void) vmcall(); } +static void l2_guest_code_ept_enabled(void) +{ + l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); +} + +static void l2_guest_code_ept_disabled(void) +{ + /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ + l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); +} + void l1_guest_code(struct vmx_pages *vmx) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); GUEST_ASSERT(load_vmcs(vmx)); - prepare_vmcs(vmx, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); + if (vmx->eptp_gpa) + l2_rip = l2_guest_code_ept_enabled; + else + l2_rip = l2_guest_code_ept_disabled; + + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(false); GUEST_ASSERT(!vmlaunch()); @@ -64,7 +77,7 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } -int main(int argc, char *argv[]) +static void test_vmx_dirty_log(bool enable_ept) { vm_vaddr_t vmx_pages_gva = 0; struct vmx_pages *vmx; @@ -76,8 +89,7 @@ int main(int argc, char *argv[]) struct ucall uc; bool done = false; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); + pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); @@ -103,17 +115,22 @@ int main(int argc, char *argv[]) * * Note that prepare_eptp should be called only L1's GPA map is done, * meaning after the last call to virt_map. + * + * When EPT is disabled, the L2 guest code will still access the same L1 + * GPAs as the EPT enabled case. */ - prepare_eptp(vmx, vm, 0); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + if (enable_ept) { + prepare_eptp(vmx, vm); + nested_map_memslot(vmx, vm, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); + } bmap = bitmap_zalloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -128,17 +145,17 @@ int main(int argc, char *argv[]) */ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); } - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; @@ -148,3 +165,15 @@ int main(int argc, char *argv[]) } } } + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + test_vmx_dirty_log(/*enable_ept=*/false); + + if (kvm_cpu_has_ept()) + test_vmx_dirty_log(/*enable_ept=*/true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index be0bdb8c6f78..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Expected emulation failure, got %d\n", + "Expected emulation failure, got %d", run->emulation_failure.suberror); } @@ -50,7 +50,7 @@ static void set_timer(void) timer.it_value.tv_sec = 0; timer.it_value.tv_usec = 200; timer.it_interval = timer.it_value; - ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); + TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); } static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) @@ -110,14 +110,11 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); /* diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a100ee5f0009 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..90720b6205f4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c new file mode 100644 index 000000000000..cf1d2d1f2a8f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * Test KVM's ability to save and restore nested state when the L1 guest + * is using 5-level paging and the L2 guest is using 4-level paging. + * + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86: + * model canonical checks more precisely"). + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define LA57_GS_BASE 0xff2bc0311fb00000ull + +static void l2_guest_code(void) +{ + /* + * Sync with L0 to trigger save/restore. After + * resuming, execute VMCALL to exit back to L1. + */ + GUEST_SYNC(1); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 guest_cr4; + vm_paddr_t pml5_pa, pml4_pa; + u64 *pml5; + u64 exit_reason; + + /* Set GS_BASE to a value that is only canonical with LA57. */ + wrmsr(MSR_GS_BASE, LA57_GS_BASE); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Set up L2 with a 4-level page table by pointing its CR3 to + * L1's first PML4 table and clearing CR4.LA57. This creates + * the CR4.LA57 mismatch that exercises the bug. + */ + pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK; + pml5 = (u64 *)pml5_pa; + pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK; + vmwrite(GUEST_CR3, pml4_pa); + + guest_cr4 = vmreadz(GUEST_CR4); + guest_cr4 &= ~X86_CR4_LA57; + vmwrite(GUEST_CR4, guest_cr4); + + GUEST_ASSERT(!vmlaunch()); + + exit_reason = vmreadz(VM_EXIT_REASON); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + l1_guest_code(vmx_pages); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* + * L1 needs to read its own PML5 table to set up L2. Identity map + * the PML5 table to facilitate this. + */ + virt_map(vm, vm->pgd, vm->pgd, 1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(uc.args[1] == stage, + "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]); + if (stage == 1) { + pr_info("L2 is active; performing save/restore.\n"); + state = vcpu_save_state(vcpu); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index 4c90f76930f9..7ff6f62e20a3 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -10,16 +10,15 @@ * and check it can be retrieved with KVM_GET_MSR, also test * the invalid LBR formats are rejected. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include <linux/bitmap.h> +#include "kvm_test_harness.h" #include "kvm_util.h" #include "vmx.h" -union perf_capabilities { +static union perf_capabilities { struct { u64 lbr_format:6; u64 pebs_trap:1; @@ -30,10 +29,10 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; + u64 pebs_timing_info:1; }; u64 capabilities; -}; +} host_cap; /* * The LBR format and most PEBS features are immutable, all other features are @@ -45,6 +44,7 @@ static const union perf_capabilities immutable_caps = { .pebs_arch_reg = 1, .pebs_format = -1, .pebs_baseline = 1, + .pebs_timing_info = 1, }; static const union perf_capabilities format_caps = { @@ -52,42 +52,40 @@ static const union perf_capabilities format_caps = { .pebs_format = -1, }; +static void guest_test_perf_capabilities_gp(uint64_t val) +{ + uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for value '0x%lx', got %s", + val, ex_str(vector)); +} + static void guest_code(uint64_t current_val) { - uint8_t vector; int i; - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val); - GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector); + guest_test_perf_capabilities_gp(current_val); + guest_test_perf_capabilities_gp(0); - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0); - GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector); - - for (i = 0; i < 64; i++) { - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, - current_val ^ BIT_ULL(i)); - GUEST_ASSERT_2(vector == GP_VECTOR, - current_val ^ BIT_ULL(i), vector); - } + for (i = 0; i < 64; i++) + guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i)); GUEST_DONE(); } +KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps); + /* * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value * written, that the guest always sees the userspace controlled value, and that * PERF_CAPABILITIES is immutable after KVM_RUN. */ -static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) +KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code); struct ucall uc; int r, i; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); vcpu_args_set(vcpu, 1, host_cap.capabilities); @@ -95,7 +93,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; @@ -103,7 +101,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) TEST_FAIL("Unexpected ucall: %lu", uc.cmd); } - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), + host_cap.capabilities); vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); @@ -116,31 +115,21 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail", host_cap.capabilities ^ BIT_ULL(i)); } - - kvm_vm_free(vm); } /* * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features * enabled, as well as '0' (to disable all features). */ -static void test_basic_perf_capabilities(union perf_capabilities host_cap) +KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - - kvm_vm_free(vm); } -static void test_fungible_perf_capabilities(union perf_capabilities host_cap) +KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) { const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); int bit; for_each_set_bit(bit, &fungible_caps, 64) { @@ -149,8 +138,6 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap) host_cap.capabilities & ~BIT_ULL(bit)); } vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - - kvm_vm_free(vm); } /* @@ -159,14 +146,11 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap) * separately as they are multi-bit values, e.g. toggling or setting a single * bit can generate a false positive without dedicated safeguards. */ -static void test_immutable_perf_capabilities(union perf_capabilities host_cap) +KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code) { const uint64_t reserved_caps = (~host_cap.capabilities | immutable_caps.capabilities) & ~format_caps.capabilities; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); union perf_capabilities val = host_cap; int r, bit; @@ -200,8 +184,6 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap) TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x", val.pebs_format, host_cap.pebs_format); } - - kvm_vm_free(vm); } /* @@ -210,17 +192,13 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap) * LBR_TOS as those bits are writable across all uarch implementations (arch * LBRs will need to poke a different MSR). */ -static void test_lbr_perf_capabilities(union perf_capabilities host_cap) +KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) { - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; int r; if (!host_cap.lbr_format) return; - vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); @@ -228,15 +206,34 @@ static void test_lbr_perf_capabilities(union perf_capabilities host_cap) r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); +} + +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); - kvm_vm_free(vm); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } } int main(int argc, char *argv[]) { - union perf_capabilities host_cap; - - TEST_REQUIRE(get_kvm_param_bool("enable_pmu")); + TEST_REQUIRE(kvm_is_pmu_enabled()); TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); @@ -247,9 +244,5 @@ int main(int argc, char *argv[]) TEST_ASSERT(host_cap.full_width_write, "Full-width writes should always be supported"); - test_basic_perf_capabilities(host_cap); - test_fungible_perf_capabilities(host_cap); - test_immutable_perf_capabilities(host_cap); - test_guest_wrmsr_perf_capabilities(host_cap); - test_lbr_perf_capabilities(host_cap); + return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index affc32800158..00dd2ac07a61 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c @@ -9,7 +9,6 @@ * value instead of partially decayed timer value * */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c index 41ea7028a1f8..67a62a5a8895 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c @@ -125,21 +125,25 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without - * setting the nested state but flags other than eVMCS must be clear. - * The eVMCS flag can be set if the enlightened VMCS capability has - * been enabled. + * setting the nested state. When the eVMCS flag is not set, the + * expected return value is '0'. */ set_default_vmx_state(state, state_sz); + state->flags = 0; state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; - test_nested_state_expect_einval(vcpu, state); + test_nested_state(vcpu, state); - state->flags &= KVM_STATE_NESTED_EVMCS; + /* + * When eVMCS is supported, the eVMCS flag can only be set if the + * enlightened VMCS capability has been enabled. + */ if (have_evmcs) { + state->flags = KVM_STATE_NESTED_EVMCS; test_nested_state_expect_einval(vcpu, state); vcpu_enable_evmcs(vcpu); + test_nested_state(vcpu, state); } - test_nested_state(vcpu, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ state->hdr.vmx.smm.flags = 1; diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index 67ac2a3292ef..ae4a4b6c05ca 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -19,8 +19,6 @@ * Migration is a command line option. When used on non-numa machines will * exit with error. Test is still usefull on non-numa for testing IPIs. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <getopt.h> #include <pthread.h> #include <inttypes.h> @@ -108,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data) data->halter_tpr = xapic_read_reg(APIC_TASKPRI); data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; - asm volatile("sti; hlt; cli"); + safe_halt(); + cli(); data->wake_count++; } } @@ -216,7 +215,7 @@ static void *vcpu_thread(void *arg) "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" "Halter TPR=%#x PPR=%#x LVR=%#x\n" "Migrations attempted: %lu\n" - "Migrations completed: %lu\n", + "Migrations completed: %lu", vcpu->id, (const char *)uc.args[0], params->data->ipis_sent, params->data->hlt_count, params->data->wake_count, @@ -257,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, int nodes = 0; time_t start_time, last_update, now; time_t interval_secs = 1; - int i, r; + int i; int from, to; unsigned long bit; uint64_t hlt_count; @@ -268,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, delay_usecs); /* Get set of first 64 numa nodes available */ - r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 0, MPOL_F_MEMS_ALLOWED); - TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " "(each 1-bit indicates node is present): %#lx\n", @@ -288,7 +286,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, } TEST_ASSERT(nodes > 1, - "Did not find at least 2 numa nodes. Can't do migration\n"); + "Did not find at least 2 numa nodes. Can't do migration"); fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); @@ -347,7 +345,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, wake_count != data->wake_count, "IPI, HLT and wake count have not increased " "in the last %lu seconds. " - "HLTer is likely hung.\n", interval_secs); + "HLTer is likely hung.", interval_secs); ipis_sent = data->ipis_sent; hlt_count = data->hlt_count; @@ -381,7 +379,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs, "-m adds calls to migrate_pages while vCPUs are running." " Default is no migrations.\n" "-d <delay microseconds> - delay between migrate_pages() calls." - " Default is %d microseconds.\n", + " Default is %d microseconds.", DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); } } @@ -410,8 +408,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(params[0].vcpu); vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); @@ -469,6 +465,19 @@ int main(int argc, char *argv[]) cancel_join_vcpu_thread(threads[0], params[0].vcpu); cancel_join_vcpu_thread(threads[1], params[1].vcpu); + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + fprintf(stderr, "Test successful after running for %d seconds.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 396c13f42457..3b4814c55722 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -14,11 +13,12 @@ struct xapic_vcpu { struct kvm_vcpu *vcpu; bool is_x2apic; + bool has_xavic_errata; }; static void xapic_guest_code(void) { - asm volatile("cli"); + cli(); xapic_enable(); @@ -32,9 +32,13 @@ static void xapic_guest_code(void) } } +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + static void x2apic_guest_code(void) { - asm volatile("cli"); + cli(); x2apic_enable(); @@ -42,7 +46,12 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - x2apic_write_reg(APIC_ICR, val); + if (val & X2APIC_RSVD_BITS_MASK) { + x2apic_write_reg_fault(APIC_ICR, val); + } else { + x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } GUEST_SYNC(val); } while (1); } @@ -65,34 +74,35 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); - ASSERT_EQ(uc.args[1], val); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], val); vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { - val &= (-1u | (0xffull << (32 + 24))); - ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); - } else { - ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { + return; } -} -#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ - GENMASK_ULL(17,16) | \ - GENMASK_ULL(13,13)) + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - if (x->is_x2apic) { - /* Hardware writing vICR register requires reserved bits 31:20, - * 17:16 and 13 kept as zero to avoid #GP exception. Data value - * written to vICR should mask out those bits above. - */ - val &= ~X2APIC_RSVED_BITS_MASK; - } - ____test_icr(x, val | APIC_ICR_BUSY); + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } @@ -110,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, icr | i); /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use + * vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; for (i = 0; i < 0xff; i++) { @@ -185,6 +195,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -205,6 +242,15 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); x.is_x2apic = false; + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); @@ -212,4 +258,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index 905bd5ae4431..d038c1571729 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -4,7 +4,6 @@ * * Copyright (C) 2022, Google LLC. */ - #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -20,13 +19,14 @@ * Assert that architectural dependency rules are satisfied, e.g. that AVX is * supported if and only if SSE is supported. */ -#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ -do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ - \ - GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) || \ - __supported == ((xfeatures) | (dependencies)), \ - __supported, (xfeatures), (dependencies)); \ +#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ +do { \ + uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + \ + __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ + __supported == ((xfeatures) | (dependencies)), \ + "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + __supported, (xfeatures), (dependencies)); \ } while (0) /* @@ -41,22 +41,23 @@ do { \ do { \ uint64_t __supported = (supported_xcr0) & (xfeatures); \ \ - GUEST_ASSERT_2(!__supported || __supported == (xfeatures), \ + __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ + "supported = 0x%lx, xfeatures = 0x%llx", \ __supported, (xfeatures)); \ } while (0) static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -78,15 +79,24 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got %s", + ex_str(vector)); + vector = xsetbv_safe(0, supported_xcr0); - GUEST_ASSERT_2(!vector, supported_xcr0, vector); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(0x%lx), got %s", + supported_xcr0, ex_str(vector)); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) continue; vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); - GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s", + BIT_ULL(i), supported_xcr0, ex_str(vector)); } GUEST_DONE(); @@ -104,20 +114,17 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - while (1) { vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 05898ad9f4d9..23909b501ac2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" @@ -66,6 +62,7 @@ enum { TEST_POLL_TIMEOUT, TEST_POLL_MASKED, TEST_POLL_WAKE, + SET_VCPU_INFO, TEST_TIMER_PAST, TEST_LOCKING_SEND_RACE, TEST_LOCKING_POLL_RACE, @@ -128,7 +125,7 @@ struct compat_vcpu_runstate_info { uint32_t state; uint64_t state_entry_time; uint64_t time[5]; -} __attribute__((__packed__));; +} __attribute__((__packed__)); struct arch_vcpu_info { unsigned long cr2; @@ -174,8 +171,9 @@ static volatile bool guest_saw_irq; static void evtchn_handler(struct ex_regs *regs) { struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; - vi->evtchn_upcall_pending = 0; - vi->evtchn_pending_sel = 0; + + vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0); + vcpu_arch_put_guest(vi->evtchn_pending_sel, 0); guest_saw_irq = true; GUEST_SYNC(TEST_GUEST_SAW_IRQ); @@ -193,10 +191,7 @@ static void guest_code(void) struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; int i; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); /* Trigger an interrupt injection */ GUEST_SYNC(TEST_INJECT_VECTOR); @@ -325,6 +320,10 @@ static void guest_code(void) GUEST_SYNC(TEST_POLL_WAKE); + /* Set the vcpu_info to point at exactly the place it already is to + * make sure the attribute is functional. */ + GUEST_SYNC(SET_VCPU_INFO); + /* A timer wake an *unmasked* port which should wake us with an * actual interrupt, while we're polling on a different port. */ ports[0]++; @@ -379,20 +378,7 @@ wait_for_timer: GUEST_SYNC(TEST_DONE); } -static int cmp_timespec(struct timespec *a, struct timespec *b) -{ - if (a->tv_sec > b->tv_sec) - return 1; - else if (a->tv_sec < b->tv_sec) - return -1; - else if (a->tv_nsec > b->tv_nsec) - return 1; - else if (a->tv_nsec < b->tv_nsec) - return -1; - else - return 0; -} - +static struct shared_info *shinfo; static struct vcpu_info *vinfo; static struct kvm_vcpu *vcpu; @@ -408,20 +394,38 @@ static void *juggle_shinfo_state(void *arg) { struct kvm_vm *vm = (struct kvm_vm *)arg; - struct kvm_xen_hvm_attr cache_activate = { + struct kvm_xen_hvm_attr cache_activate_gfn = { .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE }; - struct kvm_xen_hvm_attr cache_deactivate = { + struct kvm_xen_hvm_attr cache_deactivate_gfn = { .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, .u.shared_info.gfn = KVM_XEN_INVALID_GFN }; + struct kvm_xen_hvm_attr cache_activate_hva = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA, + .u.shared_info.hva = (unsigned long)shinfo + }; + + struct kvm_xen_hvm_attr cache_deactivate_hva = { + .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, + .u.shared_info.hva = 0 + }; + + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + for (;;) { - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate); - __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate); + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn); pthread_testcancel(); + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn); + + if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) { + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva); + pthread_testcancel(); + __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva); + } } return NULL; @@ -429,7 +433,6 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { - struct timespec min_ts, max_ts, vm_ts; struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; @@ -446,8 +449,7 @@ int main(int argc, char *argv[]) bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); - - clock_gettime(CLOCK_REALTIME, &min_ts); + bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); vm = vm_create_with_one_vcpu(&vcpu, guest_code); @@ -456,7 +458,7 @@ int main(int argc, char *argv[]) SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); - struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); + shinfo = addr_gpa2hva(vm, SHINFO_VADDR); int zero_fd = open("/dev/zero", O_RDONLY); TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); @@ -492,10 +494,16 @@ int main(int argc, char *argv[]) "Failed to read back RUNSTATE_UPDATE_FLAG attr"); } - struct kvm_xen_hvm_attr ha = { - .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, - .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, - }; + struct kvm_xen_hvm_attr ha = {}; + + if (has_shinfo_hva) { + ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA; + ha.u.shared_info.hva = (unsigned long)shinfo; + } else { + ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO; + ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE; + } + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); /* @@ -526,8 +534,6 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); if (do_runstate_tests) { @@ -541,15 +547,9 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; @@ -566,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; @@ -866,6 +859,16 @@ int main(int argc, char *argv[]) alarm(1); break; + case SET_VCPU_INFO: + if (has_shinfo_hva) { + struct kvm_xen_vcpu_attr vih = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA, + .u.hva = (unsigned long)vinfo + }; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih); + } + break; + case TEST_TIMER_PAST: TEST_ASSERT(!evtchn_irq_expected, "Expected event channel IRQ but it didn't happen"); @@ -973,7 +976,6 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); - clock_gettime(CLOCK_REALTIME, &max_ts); /* * Just a *really* basic check that things are being put in the @@ -982,6 +984,8 @@ int main(int argc, char *argv[]) */ struct pvclock_wall_clock *wc; struct pvclock_vcpu_time_info *ti, *ti2; + struct kvm_clock_data kcdata; + long long delta; wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); @@ -997,12 +1001,34 @@ int main(int argc, char *argv[]) ti2->tsc_shift, ti2->flags); } - vm_ts.tv_sec = wc->sec; - vm_ts.tv_nsec = wc->nsec; TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); - TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); - TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); + + vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); + + if (kcdata.flags & KVM_CLOCK_REALTIME) { + if (verbose) { + printf("KVM_GET_CLOCK clock: %lld.%09lld\n", + kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); + printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", + kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); + } + + delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); + + /* + * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but + * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s + * delta as testing clock accuracy is not the goal here. The test just needs to + * check that the value in shinfo is somewhat sane. + */ + TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, + "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", + wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, + (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); + } else { + pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); + } TEST_ASSERT(ti->version && !(ti->version & 1), "Bad time_info version %x", ti->version); diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c index c94cde3b523f..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c @@ -10,6 +10,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 @@ -108,16 +109,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); if (run->exit_reason == KVM_EXIT_XEN) { - ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); - ASSERT_EQ(run->xen.u.hcall.cpl, 0); - ASSERT_EQ(run->xen.u.hcall.longmode, 1); - ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); - ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); - ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); - ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); - ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); - ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); - ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); + TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); + TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0); + TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1); + TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); + TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); run->xen.u.hcall.result = RETVALUE; continue; } diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index e0ddf47362e7..f331a4e9bae3 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c @@ -4,8 +4,6 @@ * * Tests for the IA32_XSS MSR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "test_util.h" @@ -29,7 +27,7 @@ int main(int argc, char *argv[]) xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); TEST_ASSERT(xss_val == 0, - "MSR_IA32_XSS should be initialized to zero\n"); + "MSR_IA32_XSS should be initialized to zero"); vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c deleted file mode 100644 index 624dc725e14d..000000000000 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ /dev/null @@ -1,85 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * CR4 and CPUID sync test - * - * Copyright 2018, Red Hat, Inc. and/or its affiliates. - * - * Author: - * Wei Huang <wei@redhat.com> - */ - -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" - -#include "kvm_util.h" -#include "processor.h" - -static inline bool cr4_cpuid_is_sync(void) -{ - uint64_t cr4 = get_cr4(); - - return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); -} - -static void guest_code(void) -{ - uint64_t cr4; - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - - /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct kvm_sregs sregs; - struct ucall uc; - - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c deleted file mode 100644 index ce1ccc4c1503..000000000000 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * mmio_warning_test - * - * Copyright (C) 2019, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Test that we don't get a kernel warning when we call KVM_RUN after a - * triple fault occurs. To get the triple fault to occur we call KVM_RUN - * on a VCPU that hasn't been properly setup. - * - */ - -#define _GNU_SOURCE -#include <fcntl.h> -#include <kvm_util.h> -#include <linux/kvm.h> -#include <processor.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <test_util.h> -#include <unistd.h> - -#define NTHREAD 4 -#define NPROCESS 5 - -struct thread_context { - int kvmcpu; - struct kvm_run *run; -}; - -void *thr(void *arg) -{ - struct thread_context *tc = (struct thread_context *)arg; - int res; - int kvmcpu = tc->kvmcpu; - struct kvm_run *run = tc->run; - - res = ioctl(kvmcpu, KVM_RUN, 0); - pr_info("ret1=%d exit_reason=%d suberror=%d\n", - res, run->exit_reason, run->internal.suberror); - - return 0; -} - -void test(void) -{ - int i, kvm, kvmvm, kvmcpu; - pthread_t th[NTHREAD]; - struct kvm_run *run; - struct thread_context tc; - - kvm = open("/dev/kvm", O_RDWR); - TEST_ASSERT(kvm != -1, "failed to open /dev/kvm"); - kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL); - TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm)); - kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0); - TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu)); - run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, - kvmcpu, 0); - tc.kvmcpu = kvmcpu; - tc.run = run; - srand(getpid()); - for (i = 0; i < NTHREAD; i++) { - pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc); - usleep(rand() % 10000); - } - for (i = 0; i < NTHREAD; i++) - pthread_join(th[i], NULL); -} - -int get_warnings_count(void) -{ - int warnings; - FILE *f; - - f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - if (fscanf(f, "%d", &warnings) < 1) - warnings = 0; - pclose(f); - - return warnings; -} - -int main(void) -{ - int warnings_before, warnings_after; - - TEST_REQUIRE(host_cpu_is_intel); - - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); - - warnings_before = get_warnings_count(); - - for (int i = 0; i < NPROCESS; ++i) { - int status; - int pid = fork(); - - if (pid < 0) - exit(1); - if (pid == 0) { - test(); - exit(0); - } - while (waitpid(pid, &status, __WALL) != pid) - ; - } - - warnings_after = get_warnings_count(); - TEST_ASSERT(warnings_before == warnings_after, - "Warnings found in kernel. Run 'dmesg' to inspect them."); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c deleted file mode 100644 index 72812644d7f5..000000000000 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "kvm_util.h" -#include "processor.h" - -#define CPUID_MWAIT (1u << 3) - -enum monitor_mwait_testcases { - MWAIT_QUIRK_DISABLED = BIT(0), - MISC_ENABLES_QUIRK_DISABLED = BIT(1), - MWAIT_DISABLED = BIT(2), -}; - -static void guest_monitor_wait(int testcase) -{ - /* - * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, - * in all other scenarios KVM should emulate them as nops. - */ - bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) && - (testcase & MWAIT_DISABLED); - u8 vector; - - GUEST_SYNC(testcase); - - /* - * Arbitrarily MONITOR this function, SVM performs fault checks before - * intercept checks, so the inputs for MONITOR and MWAIT must be valid. - */ - vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); - - vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); - - GUEST_DONE(); -} - -int main(int argc, char *argv[]) -{ - uint64_t disabled_quirks; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - int testcase; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - while (1) { - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld"); - goto done; - case UCALL_DONE: - goto done; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; - } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); - } - -done: - kvm_vm_free(vm); - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c deleted file mode 100644 index c9a07963d68a..000000000000 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test for x86 KVM_CAP_MSR_PLATFORM_INFO - * - * Copyright (C) 2018, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Verifies expected behavior of controlling guest access to - * MSR_PLATFORM_INFO. - */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 - -static void guest_code(void) -{ - uint64_t msr_platform_info; - - for (;;) { - msr_platform_info = rdmsr(MSR_PLATFORM_INFO); - GUEST_SYNC(msr_platform_info); - asm volatile ("inc %r11"); - } -} - -static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - get_ucall(vcpu, &uc); - TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); - TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == - MSR_PLATFORM_INFO_MAX_TURBO_RATIO, - "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", - MSR_PLATFORM_INFO_MAX_TURBO_RATIO); -} - -static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu) -{ - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t msr_platform_info; - - TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); - - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, - msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_enabled(vcpu); - test_msr_platform_info_disabled(vcpu); - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - - kvm_vm_free(vm); - - return 0; -} |
