summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 12:06:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-01 12:06:20 -0700
commitc8c655c34e33544aec9d64b660872ab33c29b5f1 (patch)
tree4aad88f698f04cef9e5d9d573a6df6283085dadd /tools
parentd75439d64a1e2b35e0f08906205b00279753cbed (diff)
parentb3c98052d46948a8d65d2778c7f306ff38366aac (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "s390: - More phys_to_virt conversions - Improvement of AP management for VSIE (nested virtualization) ARM64: - Numerous fixes for the pathological lock inversion issue that plagued KVM/arm64 since... forever. - New framework allowing SMCCC-compliant hypercalls to be forwarded to userspace, hopefully paving the way for some more features being moved to VMMs rather than be implemented in the kernel. - Large rework of the timer code to allow a VM-wide offset to be applied to both virtual and physical counters as well as a per-timer, per-vcpu offset that complements the global one. This last part allows the NV timer code to be implemented on top. - A small set of fixes to make sure that we don't change anything affecting the EL1&0 translation regime just after having having taken an exception to EL2 until we have executed a DSB. This ensures that speculative walks started in EL1&0 have completed. - The usual selftest fixes and improvements. x86: - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) - Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool - Move AMD_PSFD to cpufeatures.h and purge KVM's definition - Avoid unnecessary writes+flushes when the guest is only adding new PTEs - Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s optimizations when emulating invalidations - Clean up the range-based flushing APIs - Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a single A/D bit using a LOCK AND instead of XCHG, and skip all of the "handle changed SPTE" overhead associated with writing the entire entry - Track the number of "tail" entries in a pte_list_desc to avoid having to walk (potentially) all descriptors during insertion and deletion, which gets quite expensive if the guest is spamming fork() - Disallow virtualizing legacy LBRs if architectural LBRs are available, the two are mutually exclusive in hardware - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features - Overhaul the vmx_pmu_caps selftest to better validate PERF_CAPABILITIES - Apply PMU filters to emulated events and add test coverage to the pmu_event_filter selftest - AMD SVM: - Add support for virtual NMIs - Fixes for edge cases related to virtual interrupts - Intel AMX: - Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if XTILE_DATA is not being reported due to userspace not opting in via prctl() - Fix a bug in emulation of ENCLS in compatibility mode - Allow emulation of NOP and PAUSE for L2 - AMX selftests improvements - Misc cleanups MIPS: - Constify MIPS's internal callbacks (a leftover from the hardware enabling rework that landed in 6.3) Generic: - Drop unnecessary casts from "void *" throughout kvm_main.c - Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the struct size by 8 bytes on 64-bit kernels by utilizing a padding hole Documentation: - Fix goof introduced by the conversion to rST" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits) KVM: s390: pci: fix virtual-physical confusion on module unload/load KVM: s390: vsie: clarifications on setting the APCB KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() KVM: selftests: Test the PMU event "Instructions retired" KVM: selftests: Copy full counter values from guest in PMU event filter test KVM: selftests: Use error codes to signal errors in PMU event filter test KVM: selftests: Print detailed info in PMU event filter asserts KVM: selftests: Add helpers for PMC asserts in PMU event filter test KVM: selftests: Add a common helper for the PMU event filter guest code KVM: selftests: Fix spelling mistake "perrmited" -> "permitted" KVM: arm64: vhe: Drop extra isb() on guest exit KVM: arm64: vhe: Synchronise with page table walker on MMU update KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() KVM: arm64: nvhe: Synchronise with page table walker on TLBI KVM: arm64: Handle 32bit CNTPCTSS traps KVM: arm64: nvhe: Synchronise with page table walker on vcpu run KVM: arm64: vgic: Don't acquire its_lock before config_lock KVM: selftests: Add test to verify KVM's supported XCR0 ...
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/kvm.h2
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c56
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c15
-rw-r--r--tools/testing/selftests/kvm/aarch64/smccc_filter.c268
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c2
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h13
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h124
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c91
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c5
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c36
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c118
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c253
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c231
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c132
18 files changed, 1061 insertions, 297 deletions
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index d77aef872a0a..4003a166328c 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1451,7 +1451,7 @@ struct kvm_vfio_spapr_tce {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
-#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
+#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) /* deprecated */
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 84a627c43795..7a5ff646e7e7 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -105,6 +105,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
+TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
@@ -141,6 +142,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 26556a266021..8ef370924a02 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -47,6 +47,7 @@ struct test_args {
int nr_iter;
int timer_period_ms;
int migration_freq_ms;
+ struct kvm_arm_counter_offset offset;
};
static struct test_args test_args = {
@@ -54,6 +55,7 @@ static struct test_args test_args = {
.nr_iter = NR_TEST_ITERS_DEF,
.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+ .offset = { .reserved = 1 },
};
#define msecs_to_usecs(msec) ((msec) * 1000LL)
@@ -121,25 +123,35 @@ static void guest_validate_irq(unsigned int intid,
uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
unsigned long xctl = 0;
unsigned int timer_irq = 0;
+ unsigned int accessor;
- if (stage == GUEST_STAGE_VTIMER_CVAL ||
- stage == GUEST_STAGE_VTIMER_TVAL) {
- xctl = timer_get_ctl(VIRTUAL);
- timer_set_ctl(VIRTUAL, CTL_IMASK);
- xcnt = timer_get_cntct(VIRTUAL);
- cval = timer_get_cval(VIRTUAL);
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ switch (stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ case GUEST_STAGE_VTIMER_TVAL:
+ accessor = VIRTUAL;
timer_irq = vtimer_irq;
- } else if (stage == GUEST_STAGE_PTIMER_CVAL ||
- stage == GUEST_STAGE_PTIMER_TVAL) {
- xctl = timer_get_ctl(PHYSICAL);
- timer_set_ctl(PHYSICAL, CTL_IMASK);
- xcnt = timer_get_cntct(PHYSICAL);
- cval = timer_get_cval(PHYSICAL);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ case GUEST_STAGE_PTIMER_TVAL:
+ accessor = PHYSICAL;
timer_irq = ptimer_irq;
- } else {
+ break;
+ default:
GUEST_ASSERT(0);
+ return;
}
+ xctl = timer_get_ctl(accessor);
+ if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+ return;
+
+ timer_set_ctl(accessor, CTL_IMASK);
+ xcnt = timer_get_cntct(accessor);
+ cval = timer_get_cval(accessor);
+
xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
/* Make sure we are dealing with the correct timer IRQ */
@@ -148,6 +160,8 @@ static void guest_validate_irq(unsigned int intid,
/* Basic 'timer condition met' check */
GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
}
static void guest_irq_handler(struct ex_regs *regs)
@@ -158,8 +172,6 @@ static void guest_irq_handler(struct ex_regs *regs)
guest_validate_irq(intid, shared_data);
- WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
-
gic_set_eoi(intid);
}
@@ -372,6 +384,13 @@ static struct kvm_vm *test_vm_create(void)
vm_init_descriptor_tables(vm);
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+ if (!test_args.offset.reserved) {
+ if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
+ vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
+ else
+ TEST_FAIL("no support for global offset\n");
+ }
+
for (i = 0; i < nr_vcpus; i++)
vcpu_init_descriptor_tables(vcpus[i]);
@@ -403,6 +422,7 @@ static void test_print_help(char *name)
TIMER_TEST_PERIOD_MS_DEF);
pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
TIMER_TEST_MIGRATION_FREQ_MS);
+ pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
pr_info("\t-h: print this help screen\n");
}
@@ -410,7 +430,7 @@ static bool parse_args(int argc, char *argv[])
{
int opt;
- while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) {
switch (opt) {
case 'n':
test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
@@ -429,6 +449,10 @@ static bool parse_args(int argc, char *argv[])
case 'm':
test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
break;
+ case 'o':
+ test_args.offset.counter_offset = strtol(optarg, NULL, 0);
+ test_args.offset.reserved = 0;
+ break;
case 'h':
default:
goto err;
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index d287dd2cac0a..d4e1f4af29d6 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -651,7 +651,7 @@ int main(int ac, char **av)
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
- * The blessed list is up to date with kernel version v5.13-rc3
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -807,10 +807,10 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 3, 7),
ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 2),
+ ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 3),
ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 4, 5),
+ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 4, 6),
ARM64_SYS_REG(3, 0, 0, 4, 7),
ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
@@ -823,7 +823,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 5, 7),
ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
- ARM64_SYS_REG(3, 0, 0, 6, 2),
+ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
ARM64_SYS_REG(3, 0, 0, 6, 3),
ARM64_SYS_REG(3, 0, 0, 6, 4),
ARM64_SYS_REG(3, 0, 0, 6, 5),
@@ -832,8 +832,8 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3),
- ARM64_SYS_REG(3, 0, 0, 7, 4),
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 5),
ARM64_SYS_REG(3, 0, 0, 7, 6),
ARM64_SYS_REG(3, 0, 0, 7, 7),
@@ -858,6 +858,9 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
new file mode 100644
index 000000000000..f4ceae9c8925
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ * is prevented from filtering the architecture range of SMCCC calls.
+ * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+ HVC_INSN,
+ SMC_INSN,
+};
+
+#define for_each_conduit(conduit) \
+ for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+ struct arm_smccc_res res;
+
+ if (conduit == SMC_INSN)
+ smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+ else
+ smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ struct kvm_smccc_filter filter = {
+ .base = start,
+ .nr_functions = nr_functions,
+ .action = action,
+ };
+
+ return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+ TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+ /*
+ * Enable in-kernel emulation of PSCI to ensure that calls are denied
+ * due to the SMCCC filter, not because of KVM.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ struct kvm_smccc_filter filter = {
+ .base = PSCI_0_2_FN_PSCI_VERSION,
+ .nr_functions = 1,
+ .action = KVM_SMCCC_FILTER_DENY,
+ .pad = { -1 },
+ };
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ uint32_t smc64_fn;
+ int r;
+
+ r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+ 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+ 0, 0);
+
+ r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to filter 0 functions should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to overflow filter range should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to use reserved filter action should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter already configured range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+
+ TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+ "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_denied(vcpu);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+ enum smccc_conduit conduit)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+ "Unexpected exit reason: %u", run->exit_reason);
+ TEST_ASSERT(run->hypercall.nr == func_id,
+ "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+ if (conduit == SMC_INSN)
+ TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+ "KVM_HYPERCALL_EXIT_SMC is not set");
+ else
+ TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+ "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+ kvm_vm_free(vm);
+ return !r;
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_supports_smccc_filter());
+
+ test_pad_must_be_zero();
+ test_invalid_nr_functions();
+ test_overflow_nr_functions();
+ test_reserved_action();
+ test_filter_reserved_range();
+ test_filter_overlap();
+ test_filter_denied();
+ test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index d011b38e259e..8835fed09e9f 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -2,3 +2,4 @@ CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
CONFIG_USERFAULTFD=y
+CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index b0e1fc4de9e2..2439c4043fed 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -194,7 +194,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
ts_diff.tv_sec, ts_diff.tv_nsec);
pr_info("Overall demand paging rate: %f pgs/sec\n",
memstress_args.vcpu_args[0].pages * nr_vcpus /
- ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
memstress_destroy_vm(vm);
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 5f977528e09c..cb537253a6b9 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -214,6 +214,19 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+
+
uint32_t guest_get_vcpuid(void);
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index fbc2a79369b8..a089c356f354 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -213,6 +213,7 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
+bool get_kvm_param_bool(const char *param);
bool get_kvm_intel_param_bool(const char *param);
bool get_kvm_amd_param_bool(const char *param);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 90387ddcb2a9..aa434c8f19c5 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -48,6 +48,35 @@ extern bool host_cpu_is_amd;
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
+struct xstate_header {
+ u64 xstate_bv;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+ u8 i387[512];
+ struct xstate_header header;
+ u8 extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP BIT_ULL(0)
+#define XFEATURE_MASK_SSE BIT_ULL(1)
+#define XFEATURE_MASK_YMM BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
+ XFEATURE_MASK_XTILE_CFG)
+
/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
enum cpuid_output_regs {
KVM_CPUID_EAX,
@@ -131,6 +160,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
/*
* Extended Leafs, a.k.a. AMD defined
@@ -211,10 +241,14 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
+
#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
@@ -496,6 +530,24 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ __asm__ __volatile__("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
@@ -632,6 +684,15 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
!this_cpu_has(feature.anti_feature);
}
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+ if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
typedef u32 __attribute__((vector_size(16))) sse128_t;
#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
@@ -928,14 +989,45 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
-static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r = _vcpu_set_msr(vcpu, msr_index, msr_value);
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
+do { \
+ if (__builtin_constant_p(msr)) { \
+ TEST_ASSERT(cond, fmt, str, args); \
+ } else if (!(cond)) { \
+ char buf[16]; \
+ \
+ snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
+ TEST_ASSERT(cond, fmt, buf, args); \
+ } \
+} while (0)
- TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc. This is NOT an exhaustive list! The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+ return msr != MSR_IA32_TSC;
}
+#define vcpu_set_msr(vcpu, msr, val) \
+do { \
+ uint64_t r, v = val; \
+ \
+ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
+ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
+ if (!is_durable_msr(msr)) \
+ break; \
+ r = vcpu_get_msr(vcpu, msr); \
+ TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
@@ -1055,6 +1147,14 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
}
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
bool kvm_is_tdp_enabled(void);
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
@@ -1066,10 +1166,10 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-void __vm_xsave_require_permission(int bit, const char *name);
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
-#define vm_xsave_require_permission(perm) \
- __vm_xsave_require_permission(perm, #perm)
+#define vm_xsave_require_permission(xfeature) \
+ __vm_xsave_require_permission(xfeature, #xfeature)
enum pg_level {
PG_LEVEL_NONE,
@@ -1106,14 +1206,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-#define XSTATE_XTILE_CFG_BIT 17
-#define XSTATE_XTILE_DATA_BIT 18
-
-#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
-#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
-#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \
- XSTATE_XTILE_DATA_MASK)
-
#define PFERR_PRESENT_BIT 0
#define PFERR_WRITE_BIT 1
#define PFERR_USER_BIT 2
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 5972a23b2765..3a0259e25335 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -58,10 +58,27 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
return (gva >> vm->page_shift) & mask;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
+ uint64_t pte;
+
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+ uint64_t pa;
+
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+
+ return pa;
}
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
@@ -110,18 +127,18 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -130,8 +147,7 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -226,7 +242,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
- uint64_t sctlr_el1, tcr_el1;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
init = &default_init;
@@ -277,10 +293,13 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
/* Configure output size */
switch (vm->mode) {
case VM_MODE_P52V48_64K:
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
@@ -310,7 +329,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
@@ -508,29 +527,43 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
close(kvm_fd);
}
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res)
{
- asm volatile("mov w0, %w[function_id]\n"
- "mov x1, %[arg0]\n"
- "mov x2, %[arg1]\n"
- "mov x3, %[arg2]\n"
- "mov x4, %[arg3]\n"
- "mov x5, %[arg4]\n"
- "mov x6, %[arg5]\n"
- "mov x7, %[arg6]\n"
- "hvc #0\n"
- "mov %[res0], x0\n"
- "mov %[res1], x1\n"
- "mov %[res2], x2\n"
- "mov %[res3], x3\n"
- : [res0] "=r"(res->a0), [res1] "=r"(res->a1),
- [res2] "=r"(res->a2), [res3] "=r"(res->a3)
- : [function_id] "r"(function_id), [arg0] "r"(arg0),
- [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),
- [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
- : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
}
void kvm_selftest_arch_init(void)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8ec20ac33de0..298c4372fb1a 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -80,6 +80,11 @@ static bool get_module_param_bool(const char *module_name, const char *param)
TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
+bool get_kvm_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm", param);
+}
+
bool get_kvm_intel_param_bool(const char *param)
{
return get_module_param_bool("kvm_intel", param);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index c39a4353ba19..d4a0b504b1e0 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -5,6 +5,7 @@
* Copyright (C) 2018, Google LLC.
*/
+#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
@@ -573,6 +574,21 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
DEFAULT_GUEST_STACK_VADDR_MIN,
MEM_REGION_DATA);
+ stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+ /*
+ * Align stack to match calling sequence requirements in section "The
+ * Stack Frame" of the System V ABI AMD64 Architecture Processor
+ * Supplement, which requires the value (%rsp + 8) to be a multiple of
+ * 16 when control is transferred to the function entry point.
+ *
+ * If this code is ever used to launch a vCPU with 32-bit entry point it
+ * may need to subtract 4 bytes instead of 8 bytes.
+ */
+ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+ "__vm_vaddr_alloc() did not provide a page-aligned address");
+ stack_vaddr -= 8;
+
vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
vcpu_setup(vm, vcpu);
@@ -580,7 +596,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
/* Setup guest general purpose registers */
vcpu_regs_get(vcpu, &regs);
regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rsp = stack_vaddr;
regs.rip = (unsigned long) guest_code;
vcpu_regs_set(vcpu, &regs);
@@ -681,7 +697,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
return buffer.entry.data;
}
-void __vm_xsave_require_permission(int bit, const char *name)
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
{
int kvm_fd;
u64 bitmask;
@@ -689,12 +705,15 @@ void __vm_xsave_require_permission(int bit, const char *name)
struct kvm_device_attr attr = {
.group = 0,
.attr = KVM_X86_XCOMP_GUEST_SUPP,
- .addr = (unsigned long) &bitmask
+ .addr = (unsigned long) &bitmask,
};
TEST_ASSERT(!kvm_supported_cpuid,
"kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+ TEST_ASSERT(is_power_of_2(xfeature),
+ "Dynamic XFeatures must be enabled one at a time");
+
kvm_fd = open_kvm_dev_path_or_exit();
rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
close(kvm_fd);
@@ -704,16 +723,16 @@ void __vm_xsave_require_permission(int bit, const char *name)
TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
- __TEST_REQUIRE(bitmask & (1ULL << bit),
+ __TEST_REQUIRE(bitmask & xfeature,
"Required XSAVE feature '%s' not supported", name);
- TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
- TEST_ASSERT(bitmask & (1ULL << bit),
- "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
- bitmask);
+ TEST_ASSERT(bitmask & xfeature,
+ "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+ name, xfeature, bitmask);
}
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
@@ -954,6 +973,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
vcpu_run_complete_io(vcpu);
state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+ TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
vcpu_events_get(vcpu, &state->events);
vcpu_mp_state_get(vcpu, &state->mp_state);
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index b646cdb5055a..11329e5ff945 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -30,21 +30,12 @@
#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX 1
#define MAX_TILES 16
#define RESERVED_BYTES 14
-#define XFEATURE_XTILECFG 17
-#define XFEATURE_XTILEDATA 18
-#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
-#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
-#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
-
#define XSAVE_HDR_OFFSET 512
-struct xsave_data {
- u8 area[XSAVE_SIZE];
-} __aligned(64);
-
struct tile_config {
u8 palette_id;
u8 start_row;
@@ -68,24 +59,6 @@ struct xtile_info {
static struct xtile_info xtile;
-static inline u64 __xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile("xgetbv;"
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void __xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
-}
-
static inline void __ldtilecfg(void *cfg)
{
asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
@@ -103,27 +76,16 @@ static inline void __tilerelease(void)
asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
}
-static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
{
uint32_t rfbm_lo = rfbm;
uint32_t rfbm_hi = rfbm >> 32;
asm volatile("xsavec (%%rdi)"
- : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi)
+ : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
: "memory");
}
-static inline void check_cpuid_xsave(void)
-{
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
- GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
-}
-
-static bool check_xsave_supports_xtile(void)
-{
- return __xgetbv(0) & XFEATURE_MASK_XTILE;
-}
-
static void check_xtile_info(void)
{
GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
@@ -135,6 +97,10 @@ static void check_xtile_info(void)
GUEST_ASSERT(xtile.xsave_size == 8192);
GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+ PALETTE_TABLE_INDEX);
+
GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
GUEST_ASSERT(xtile.max_names == 8);
@@ -158,37 +124,29 @@ static void set_tilecfg(struct tile_config *cfg)
}
}
-static void set_xstatebv(void *data, uint64_t bv)
-{
- *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv;
-}
-
-static u64 get_xstatebv(void *data)
-{
- return *(u64 *)(data + XSAVE_HDR_OFFSET);
-}
-
static void init_regs(void)
{
uint64_t cr4, xcr0;
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
+
/* turn on CR4.OSXSAVE */
cr4 = get_cr4();
cr4 |= X86_CR4_OSXSAVE;
set_cr4(cr4);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
- xcr0 = __xgetbv(0);
+ xcr0 = xgetbv(0);
xcr0 |= XFEATURE_MASK_XTILE;
- __xsetbv(0x0, xcr0);
+ xsetbv(0x0, xcr0);
+ GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
}
static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
struct tile_data *tiledata,
- struct xsave_data *xsave_data)
+ struct xstate *xstate)
{
init_regs();
- check_cpuid_xsave();
- check_xsave_supports_xtile();
check_xtile_info();
GUEST_SYNC(1);
@@ -204,15 +162,29 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_SYNC(4);
__tilerelease();
GUEST_SYNC(5);
- /* bit 18 not in the XCOMP_BV after xsavec() */
- set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA);
- __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA);
- GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0);
+ /*
+ * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+ * the xcomp_bv.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
/* xfd=0x40000, disable amx tiledata */
- wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA);
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /*
+ * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+ * remains the same even when amx tiledata is disabled by IA32_XFD.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
GUEST_SYNC(6);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
/* Trigger #NM exception */
@@ -224,11 +196,14 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
void guest_nm_handler(struct ex_regs *regs)
{
- /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
GUEST_SYNC(7);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
GUEST_SYNC(8);
- GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
/* Clear xfd_err */
wrmsr(MSR_IA32_XFD_ERR, 0);
/* xfd=0, enable amx */
@@ -243,7 +218,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct kvm_x86_state *state;
int xsave_restore_size;
- vm_vaddr_t amx_cfg, tiledata, xsavedata;
+ vm_vaddr_t amx_cfg, tiledata, xstate;
struct ucall uc;
u32 amx_offset;
int stage, ret;
@@ -252,13 +227,14 @@ int main(int argc, char *argv[])
* Note, all off-by-default features must be enabled before anything
* caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
*/
- vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT);
+ vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
@@ -282,10 +258,10 @@ int main(int argc, char *argv[])
tiledata = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
- /* xsave data for guest_code */
- xsavedata = vm_vaddr_alloc_pages(vm, 3);
- memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
- vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xsavedata);
+ /* XSAVE state for guest_code */
+ xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
for (stage = 1; ; stage++) {
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 2feef25ba691..40507ed9fe8a 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -54,6 +54,21 @@
#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
+
+/*
+ * "Retired instructions", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ * --- and ---
+ * "Instructions retired", from the Intel SDM, volume 3,
+ * "Pre-defined Architectural Performance Events."
+ */
+
+#define INST_RETIRED EVENT(0xc0, 0)
+
/*
* This event list comprises Intel's eight architectural events plus
* AMD's "retired branch instructions" for Zen[123] (and possibly
@@ -61,7 +76,7 @@
*/
static const uint64_t event_list[] = {
EVENT(0x3c, 0),
- EVENT(0xc0, 0),
+ INST_RETIRED,
EVENT(0x3c, 1),
EVENT(0x2e, 0x4f),
EVENT(0x2e, 0x41),
@@ -71,13 +86,21 @@ static const uint64_t event_list[] = {
AMD_ZEN_BR_RETIRED,
};
+struct {
+ uint64_t loads;
+ uint64_t stores;
+ uint64_t loads_stores;
+ uint64_t branches_retired;
+ uint64_t instructions_retired;
+} pmc_results;
+
/*
* If we encounter a #GP during the guest PMU sanity check, then the guest
* PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
*/
static void guest_gp_handler(struct ex_regs *regs)
{
- GUEST_SYNC(0);
+ GUEST_SYNC(-EFAULT);
}
/*
@@ -92,12 +115,23 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip)
wrmsr(msr, v);
if (rdmsr(msr) != v)
- GUEST_SYNC(0);
+ GUEST_SYNC(-EIO);
v ^= bits_to_flip;
wrmsr(msr, v);
if (rdmsr(msr) != v)
- GUEST_SYNC(0);
+ GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+ const uint64_t branches_retired = rdmsr(msr_base + 0);
+ const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
}
static void intel_guest_code(void)
@@ -105,19 +139,18 @@ static void intel_guest_code(void)
check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
check_msr(MSR_P6_EVNTSEL0, 0xffff);
check_msr(MSR_IA32_PMC0, 0xffff);
- GUEST_SYNC(1);
+ GUEST_SYNC(0);
for (;;) {
- uint64_t br0, br1;
-
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
- wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
- br0 = rdmsr(MSR_IA32_PMC0);
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
- br1 = rdmsr(MSR_IA32_PMC0);
- GUEST_SYNC(br1 - br0);
+ wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+ run_and_measure_loop(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
}
}
@@ -130,18 +163,17 @@ static void amd_guest_code(void)
{
check_msr(MSR_K7_EVNTSEL0, 0xffff);
check_msr(MSR_K7_PERFCTR0, 0xffff);
- GUEST_SYNC(1);
+ GUEST_SYNC(0);
for (;;) {
- uint64_t br0, br1;
-
wrmsr(MSR_K7_EVNTSEL0, 0);
wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
- br0 = rdmsr(MSR_K7_PERFCTR0);
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
- br1 = rdmsr(MSR_K7_PERFCTR0);
- GUEST_SYNC(br1 - br0);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+
+ run_and_measure_loop(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
}
}
@@ -161,6 +193,19 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
return uc.args[1];
}
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ memset(&pmc_results, 0, sizeof(pmc_results));
+ sync_global_to_guest(vcpu->vm, pmc_results);
+
+ r = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+ sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
/*
* In a nested environment or if the vPMU is disabled, the guest PMU
* might not work as architected (accessing the PMU MSRs may raise
@@ -171,13 +216,13 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
*/
static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
{
- bool success;
+ uint64_t r;
vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
- success = run_vcpu_to_sync(vcpu);
+ r = run_vcpu_to_sync(vcpu);
vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
- return success;
+ return !r;
}
static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
@@ -237,91 +282,101 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
return f;
}
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ if (br && br != NUM_BRANCHES) \
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
+ __func__, br, NUM_BRANCHES); \
+ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
+ __func__, br); \
+ TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
+ __func__, ir); \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
+ __func__, br); \
+ TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
+ __func__, ir); \
+} while (0)
+
static void test_without_filter(struct kvm_vcpu *vcpu)
{
- uint64_t count = run_vcpu_to_sync(vcpu);
+ run_vcpu_and_sync_pmc_results(vcpu);
- if (count != NUM_BRANCHES)
- pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- __func__, count, NUM_BRANCHES);
- TEST_ASSERT(count, "Allowed PMU event is not counting");
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
-static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
- struct kvm_pmu_event_filter *f)
+static void test_with_filter(struct kvm_vcpu *vcpu,
+ struct kvm_pmu_event_filter *f)
{
vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
- return run_vcpu_to_sync(vcpu);
+ run_vcpu_and_sync_pmc_results(vcpu);
}
static void test_amd_deny_list(struct kvm_vcpu *vcpu)
{
uint64_t event = EVENT(0x1C2, 0);
struct kvm_pmu_event_filter *f;
- uint64_t count;
f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
- count = test_with_filter(vcpu, f);
-
+ test_with_filter(vcpu, f);
free(f);
- if (count != NUM_BRANCHES)
- pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- __func__, count, NUM_BRANCHES);
- TEST_ASSERT(count, "Allowed PMU event is not counting");
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
- uint64_t count = test_with_filter(vcpu, f);
+ test_with_filter(vcpu, f);
free(f);
- if (count)
- pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
- __func__, count);
- TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
static void test_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
- uint64_t count = test_with_filter(vcpu, f);
+ test_with_filter(vcpu, f);
free(f);
- if (count != NUM_BRANCHES)
- pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- __func__, count, NUM_BRANCHES);
- TEST_ASSERT(count, "Allowed PMU event is not counting");
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
- uint64_t count;
+ remove_event(f, INST_RETIRED);
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
- count = test_with_filter(vcpu, f);
+ test_with_filter(vcpu, f);
free(f);
- if (count != NUM_BRANCHES)
- pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- __func__, count, NUM_BRANCHES);
- TEST_ASSERT(count, "Allowed PMU event is not counting");
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
- uint64_t count;
+ remove_event(f, INST_RETIRED);
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
- count = test_with_filter(vcpu, f);
+ test_with_filter(vcpu, f);
free(f);
- if (count)
- pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
- __func__, count);
- TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
/*
@@ -450,51 +505,30 @@ static bool supports_event_mem_inst_retired(void)
#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
-struct perf_counter {
- union {
- uint64_t raw;
- struct {
- uint64_t loads:22;
- uint64_t stores:22;
- uint64_t loads_stores:20;
- };
- };
-};
-
-static uint64_t masked_events_guest_test(uint32_t msr_base)
+static void masked_events_guest_test(uint32_t msr_base)
{
- uint64_t ld0, ld1, st0, st1, ls0, ls1;
- struct perf_counter c;
- int val;
-
/*
- * The acutal value of the counters don't determine the outcome of
+ * The actual value of the counters don't determine the outcome of
* the test. Only that they are zero or non-zero.
*/
- ld0 = rdmsr(msr_base + 0);
- st0 = rdmsr(msr_base + 1);
- ls0 = rdmsr(msr_base + 2);
+ const uint64_t loads = rdmsr(msr_base + 0);
+ const uint64_t stores = rdmsr(msr_base + 1);
+ const uint64_t loads_stores = rdmsr(msr_base + 2);
+ int val;
+
__asm__ __volatile__("movl $0, %[v];"
"movl %[v], %%eax;"
"incl %[v];"
: [v]"+m"(val) :: "eax");
- ld1 = rdmsr(msr_base + 0);
- st1 = rdmsr(msr_base + 1);
- ls1 = rdmsr(msr_base + 2);
-
- c.loads = ld1 - ld0;
- c.stores = st1 - st0;
- c.loads_stores = ls1 - ls0;
-
- return c.raw;
+ pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
}
static void intel_masked_events_guest_code(void)
{
- uint64_t r;
-
for (;;) {
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -507,16 +541,13 @@ static void intel_masked_events_guest_code(void)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
- r = masked_events_guest_test(MSR_IA32_PMC0);
-
- GUEST_SYNC(r);
+ masked_events_guest_test(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
}
}
static void amd_masked_events_guest_code(void)
{
- uint64_t r;
-
for (;;) {
wrmsr(MSR_K7_EVNTSEL0, 0);
wrmsr(MSR_K7_EVNTSEL1, 0);
@@ -529,26 +560,22 @@ static void amd_masked_events_guest_code(void)
wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
- r = masked_events_guest_test(MSR_K7_PERFCTR0);
-
- GUEST_SYNC(r);
+ masked_events_guest_test(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
}
}
-static struct perf_counter run_masked_events_test(struct kvm_vcpu *vcpu,
- const uint64_t masked_events[],
- const int nmasked_events)
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ const uint64_t masked_events[],
+ const int nmasked_events)
{
struct kvm_pmu_event_filter *f;
- struct perf_counter r;
f = create_pmu_event_filter(masked_events, nmasked_events,
KVM_PMU_EVENT_ALLOW,
KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
- r.raw = test_with_filter(vcpu, f);
+ test_with_filter(vcpu, f);
free(f);
-
- return r;
}
/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
@@ -673,7 +700,6 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
int nevents)
{
int ntests = ARRAY_SIZE(test_cases);
- struct perf_counter c;
int i, n;
for (i = 0; i < ntests; i++) {
@@ -685,13 +711,15 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
n = append_test_events(test, events, nevents);
- c = run_masked_events_test(vcpu, events, n);
- TEST_ASSERT(bool_eq(c.loads, test->flags & ALLOW_LOADS) &&
- bool_eq(c.stores, test->flags & ALLOW_STORES) &&
- bool_eq(c.loads_stores,
+ run_masked_events_test(vcpu, events, n);
+
+ TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ bool_eq(pmc_results.loads_stores,
test->flags & ALLOW_LOADS_STORES),
- "%s loads: %u, stores: %u, loads + stores: %u",
- test->msg, c.loads, c.stores, c.loads_stores);
+ "%s loads: %lu, stores: %lu, loads + stores: %lu",
+ test->msg, pmc_results.loads, pmc_results.stores,
+ pmc_results.loads_stores);
}
}
@@ -764,6 +792,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu, *vcpu2 = NULL;
struct kvm_vm *vm;
+ TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index d427eb146bc5..fa03c8d1ce4e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -126,12 +126,16 @@ static void stable_tsc_check_supported(void)
goto skip_test;
if (fgets(buf, sizeof(buf), fp) == NULL)
- goto skip_test;
+ goto close_fp;
if (strncmp(buf, "tsc", sizeof(buf)))
- goto skip_test;
+ goto close_fp;
+ fclose(fp);
return;
+
+close_fp:
+ fclose(fp);
skip_test:
print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
exit(KSFT_SKIP);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index c280ba1e6572..4c90f76930f9 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -14,12 +14,11 @@
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
#include "kvm_util.h"
#include "vmx.h"
-#define PMU_CAP_FW_WRITES (1ULL << 13)
-#define PMU_CAP_LBR_FMT 0x3f
-
union perf_capabilities {
struct {
u64 lbr_format:6;
@@ -36,59 +35,221 @@ union perf_capabilities {
u64 capabilities;
};
-static void guest_code(void)
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_code(uint64_t current_val)
{
- wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+ uint8_t vector;
+ int i;
+
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
+ GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
+
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
+ GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+
+ for (i = 0; i < 64; i++) {
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
+ current_val ^ BIT_ULL(i));
+ GUEST_ASSERT_2(vector == GP_VECTOR,
+ current_val ^ BIT_ULL(i), vector);
+ }
+
+ GUEST_DONE();
}
-int main(int argc, char *argv[])
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
{
- struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- int ret;
- union perf_capabilities host_cap;
- uint64_t val;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ struct ucall uc;
+ int r, i;
- host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
- host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
- TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
- TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
- /* testcase 1, set capabilities when we have PDCM bit */
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
- /* check capabilities can be retrieved with KVM_GET_MSR */
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- /* check whatever we write with KVM_SET_MSR is _not_ modified */
- vcpu_run(vcpu);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+static void test_basic_perf_capabilities(union perf_capabilities host_cap)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
- /* testcase 2, check valid LBR formats are accepted */
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ kvm_vm_free(vm);
+}
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
/*
- * Testcase 3, check that an "invalid" LBR format is rejected. Only an
- * exact match of the host's format (and 0/disabled) is allowed.
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
*/
- for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
- if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
continue;
- ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
- TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
}
- printf("Completed perf capability tests.\n");
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+
kvm_vm_free(vm);
}
+
+int main(int argc, char *argv[])
+{
+ union perf_capabilities host_cap;
+
+ TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ test_basic_perf_capabilities(host_cap);
+ test_fungible_perf_capabilities(host_cap);
+ test_immutable_perf_capabilities(host_cap);
+ test_guest_wrmsr_perf_capabilities(host_cap);
+ test_lbr_perf_capabilities(host_cap);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
new file mode 100644
index 000000000000..905bd5ae4431
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ __supported, (xfeatures), (dependencies)); \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently. E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
+do { \
+ uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ \
+ GUEST_ASSERT_2(!__supported || __supported == (xfeatures), \
+ __supported, (xfeatures)); \
+} while (0)
+
+static void guest_code(void)
+{
+ uint64_t xcr0_reset;
+ uint64_t supported_xcr0;
+ int i, vector;
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+ xcr0_reset = xgetbv(0);
+ supported_xcr0 = this_cpu_supported_xcr0();
+
+ GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP);
+
+ /* Check AVX */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_YMM,
+ XFEATURE_MASK_SSE);
+
+ /* Check MPX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ /* Check AVX-512 */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_AVX512,
+ XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_AVX512);
+
+ /* Check AMX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_XTILE);
+
+ vector = xsetbv_safe(0, supported_xcr0);
+ GUEST_ASSERT_2(!vector, supported_xcr0, vector);
+
+ for (i = 0; i < 64; i++) {
+ if (supported_xcr0 & BIT_ULL(i))
+ continue;
+
+ vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+ GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i));
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx");
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}