Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini: "ARM: - Proper emulation of the OSLock feature of the debug architecture - Scalibility improvements for the MMU lock when dirty logging is on - New VMID allocator, which will eventually help with SVA in VMs - Better support for PMUs in heterogenous systems - PSCI 1.1 support, enabling support for SYSTEM_RESET2 - Implement CONFIG_DEBUG_LIST at EL2 - Make CONFIG_ARM64_ERRATUM_2077057 default y - Reduce the overhead of VM exit when no interrupt is pending - Remove traces of 32bit ARM host support from the documentation - Updated vgic selftests - Various cleanups, doc updates and spelling fixes RISC-V: - Prevent KVM_COMPAT from being selected - Optimize __kvm_riscv_switch_to() implementation - RISC-V SBI v0.3 support s390: - memop selftest - fix SCK locking - adapter interruptions virtualization for secure guests - add Claudio Imbrenda as maintainer - first step to do proper storage key checking x86: - Continue switching kvm_x86_ops to static_call(); introduce static_call_cond() and __static_call_ret0 when applicable. - Cleanup unused arguments in several functions - Synthesize AMD 0x80000021 leaf - Fixes and optimization for Hyper-V sparse-bank hypercalls - Implement Hyper-V's enlightened MSR bitmap for nested SVM - Remove MMU auditing - Eager splitting of page tables (new aka "TDP" MMU only) when dirty page tracking is enabled - Cleanup the implementation of the guest PGD cache - Preparation for the implementation of Intel IPI virtualization - Fix some segment descriptor checks in the emulator - Allow AMD AVIC support on systems with physical APIC ID above 255 - Better API to disable virtualization quirks - Fixes and optimizations for the zapping of page tables: - Zap roots in two passes, avoiding RCU read-side critical sections that last too long for very large guests backed by 4 KiB SPTEs. - Zap invalid and defunct roots asynchronously via concurrency-managed work queue. - Allowing yielding when zapping TDP MMU roots in response to the root's last reference being put. - Batch more TLB flushes with an RCU trick. Whoever frees the paging structure now holds RCU as a proxy for all vCPUs running in the guest, i.e. to prolongs the grace period on their behalf. It then kicks the the vCPUs out of guest mode before doing rcu_read_unlock(). Generic: - Introduce __vcalloc and use it for very large allocations that need memcg accounting" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (246 commits) KVM: use kvcalloc for array allocations KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 kvm: x86: Require const tsc for RT KVM: x86: synthesize CPUID leaf 0x80000021h if useful KVM: x86: add support for CPUID leaf 0x80000021 KVM: x86: do not use KVM_X86_OP_OPTIONAL_RET0 for get_mt_mask Revert "KVM: x86/mmu: Zap only TDP MMU leafs in kvm_zap_gfn_range()" kvm: x86/mmu: Flush TLB before zap_gfn_range releases RCU KVM: arm64: fix typos in comments KVM: arm64: Generalise VM features into a set of flags KVM: s390: selftests: Add error memop tests KVM: s390: selftests: Add more copy memop tests KVM: s390: selftests: Add named stages for memop test KVM: s390: selftests: Add macro as abstraction for MEM_OP KVM: s390: selftests: Split memop tests KVM: s390x: fix SCK locking RISC-V: KVM: Implement SBI HSM suspend call RISC-V: KVM: Add common kvm_riscv_vcpu_wfi() function RISC-V: Add SBI HSM suspend related defines RISC-V: KVM: Implement SBI v0.3 SRST extension ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2022-03-24 11:58:57 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2022-03-24 11:58:57 -0700
commit: 1ebdbeb03efe89f01f15df038a589077df3d21f5 (patch)
tree: 06b6b7bb565668d136c060c5104481e48cbf71e2 /arch/arm64/kvm/arm.c
parent: efee6c79298fd823c569d501d041de85caa102a6 (diff)
parent: c9b8fecddb5bb4b67e351bbaeaa648a6f7456912 (diff)
1 files changed, 47 insertions, 95 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 946e401ef6b0..523bc934fe2f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -53,11 +53,6 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
-
 static bool vgic_present;
 
 static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
@@ -89,7 +84,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	switch (cap->cap) {
 	case KVM_CAP_ARM_NISV_TO_USER:
 		r = 0;
-		kvm->arch.return_nisv_io_abort_to_user = true;
+		set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
+			&kvm->arch.flags);
 		break;
 	case KVM_CAP_ARM_MTE:
 		mutex_lock(&kvm->lock);
@@ -97,7 +93,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 			r = -EINVAL;
 		} else {
 			r = 0;
-			kvm->arch.mte_enabled = true;
+			set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
 		}
 		mutex_unlock(&kvm->lock);
 		break;
@@ -150,6 +146,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
+		goto out_free_stage2_pgd;
+	cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
+
 	kvm_vgic_early_init(kvm);
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
@@ -176,6 +176,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	bitmap_free(kvm->arch.pmu_filter);
+	free_cpumask_var(kvm->arch.supported_cpus);
 
 	kvm_vgic_destroy(kvm);
 
@@ -411,6 +412,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (vcpu_has_ptrauth(vcpu))
 		vcpu_ptrauth_disable(vcpu);
 	kvm_arch_vcpu_load_debug_state_flags(vcpu);
+
+	if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+		vcpu_set_on_unsupported_cpu(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -422,7 +426,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	kvm_timer_vcpu_put(vcpu);
 	kvm_vgic_put(vcpu);
 	kvm_vcpu_pmu_restore_host(vcpu);
+	kvm_arm_vmid_clear_active();
 
+	vcpu_clear_on_unsupported_cpu(vcpu);
 	vcpu->cpu = -1;
 }
 
@@ -489,87 +495,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 }
 #endif
 
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
-{
-}
-
-void force_vm_exit(const cpumask_t *mask)
-{
-	preempt_disable();
-	smp_call_function_many(mask, exit_vm_noop, NULL, true);
-	preempt_enable();
-}
-
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @vmid: The VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports a limited set of values with the value zero reserved
- * for the host, so we check if an assigned value belongs to a previous
- * generation, which requires us to assign a new value. If we're the first to
- * use a VMID for the new generation, we must flush necessary caches and TLBs
- * on all CPUs.
- */
-static bool need_new_vmid_gen(struct kvm_vmid *vmid)
-{
-	u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
-	smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
-	return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
-}
-
-/**
- * update_vmid - Update the vmid with a valid VMID for the current generation
- * @vmid: The stage-2 VMID information struct
- */
-static void update_vmid(struct kvm_vmid *vmid)
-{
-	if (!need_new_vmid_gen(vmid))
-		return;
-
-	spin_lock(&kvm_vmid_lock);
-
-	/*
-	 * We need to re-check the vmid_gen here to ensure that if another vcpu
-	 * already allocated a valid vmid for this vm, then this vcpu should
-	 * use the same vmid.
-	 */
-	if (!need_new_vmid_gen(vmid)) {
-		spin_unlock(&kvm_vmid_lock);
-		return;
-	}
-
-	/* First user of a new VMID generation? */
-	if (unlikely(kvm_next_vmid == 0)) {
-		atomic64_inc(&kvm_vmid_gen);
-		kvm_next_vmid = 1;
-
-		/*
-		 * On SMP we know no other CPUs can use this CPU's or each
-		 * other's VMID after force_vm_exit returns since the
-		 * kvm_vmid_lock blocks them from reentry to the guest.
-		 */
-		force_vm_exit(cpu_all_mask);
-		/*
-		 * Now broadcast TLB + ICACHE invalidation over the inner
-		 * shareable domain to make sure all data structures are
-		 * clean.
-		 */
-		kvm_call_hyp(__kvm_flush_vm_context);
-	}
-
-	WRITE_ONCE(vmid->vmid, kvm_next_vmid);
-	kvm_next_vmid++;
-	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
-
-	smp_wmb();
-	WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
-
-	spin_unlock(&kvm_vmid_lock);
-}
-
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.target >= 0;
@@ -634,6 +559,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 	if (kvm_vm_is_protected(kvm))
 		kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
 
+	mutex_lock(&kvm->lock);
+	set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
+	mutex_unlock(&kvm->lock);
+
 	return ret;
 }
 
@@ -792,8 +721,15 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
 		}
 	}
 
+	if (unlikely(vcpu_on_unsupported_cpu(vcpu))) {
+		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED;
+		run->fail_entry.cpu = smp_processor_id();
+		*ret = 0;
+		return true;
+	}
+
 	return kvm_request_pending(vcpu) ||
-			need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
 			xfer_to_guest_mode_work_pending();
 }
 
@@ -855,8 +791,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		if (!ret)
 			ret = 1;
 
-		update_vmid(&vcpu->arch.hw_mmu->vmid);
-
 		check_vcpu_requests(vcpu);
 
 		/*
@@ -866,6 +800,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		preempt_disable();
 
+		/*
+		 * The VMID allocator only tracks active VMIDs per
+		 * physical CPU, and therefore the VMID allocated may not be
+		 * preserved on VMID roll-over if the task was preempted,
+		 * making a thread's VMID inactive. So we need to call
+		 * kvm_arm_vmid_update() in non-premptible context.
+		 */
+		kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
+
 		kvm_pmu_flush_hwstate(vcpu);
 
 		local_irq_disable();
@@ -945,9 +888,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 * context synchronization event) is necessary to ensure that
 		 * pending interrupts are taken.
 		 */
-		local_irq_enable();
-		isb();
-		local_irq_disable();
+		if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) {
+			local_irq_enable();
+			isb();
+			local_irq_disable();
+		}
 
 		guest_timing_exit_irqoff();
 
@@ -1742,7 +1687,7 @@ static void init_cpu_logical_map(void)
 
 	/*
 	 * Copy the MPIDR <-> logical CPU ID mapping to hyp.
-	 * Only copy the set of online CPUs whose features have been chacked
+	 * Only copy the set of online CPUs whose features have been checked
 	 * against the finalized system capabilities. The hypervisor will not
 	 * allow any other CPUs from the `possible` set to boot.
 	 */
@@ -2159,6 +2104,12 @@ int kvm_arch_init(void *opaque)
 	if (err)
 		return err;
 
+	err = kvm_arm_vmid_alloc_init();
+	if (err) {
+		kvm_err("Failed to initialize VMID allocator.\n");
+		return err;
+	}
+
 	if (!in_hyp_mode) {
 		err = init_hyp_mode();
 		if (err)
@@ -2198,6 +2149,7 @@ out_hyp:
 	if (!in_hyp_mode)
 		teardown_hyp_mode();
 out_err:
+	kvm_arm_vmid_alloc_free();
 	return err;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2022-03-24 11:58:57 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2022-03-24 11:58:57 -0700
commit	1ebdbeb03efe89f01f15df038a589077df3d21f5 (patch)
tree	06b6b7bb565668d136c060c5104481e48cbf71e2 /arch/arm64/kvm/arm.c
parent	efee6c79298fd823c569d501d041de85caa102a6 (diff)
parent	c9b8fecddb5bb4b67e351bbaeaa648a6f7456912 (diff)