diff options
| -rw-r--r-- | Documentation/virt/kvm/locking.rst | 9 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 174 | 
2 files changed, 66 insertions, 117 deletions
| diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index c0cb5ce51c1e..be3c323888b1 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -9,7 +9,9 @@ KVM Lock Overview  The acquisition orders for mutexes are as follows: -- cpus_read_lock() is taken outside kvm_lock and kvm_usage_lock +- cpus_read_lock() is taken outside kvm_lock + +- kvm_usage_lock is taken outside cpus_read_lock()  - kvm->lock is taken outside vcpu->mutex @@ -241,9 +243,8 @@ time it will be set using the Dirty tracking mechanism described above.  :Arch:		any  :Protects:	- kvm_usage_count  		- hardware virtualization enable/disable -:Comment:	Exists because using kvm_lock leads to deadlock (see earlier comment -		on cpus_read_lock() vs kvm_lock).  Note, KVM also disables CPU hotplug via -		cpus_read_lock() when enabling/disabling virtualization. +:Comment:	Exists to allow taking cpus_read_lock() while kvm_usage_count is +		protected, which simplifies the virtualization enabling logic.  ``kvm->mn_invalidate_lock``  ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7164a9ece208..9ad8903b575a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5578,7 +5578,7 @@ static DEFINE_PER_CPU(bool, hardware_enabled);  static DEFINE_MUTEX(kvm_usage_lock);  static int kvm_usage_count; -static int __hardware_enable_nolock(void) +static int hardware_enable_nolock(void)  {  	if (__this_cpu_read(hardware_enabled))  		return 0; @@ -5593,34 +5593,18 @@ static int __hardware_enable_nolock(void)  	return 0;  } -static void hardware_enable_nolock(void *failed) -{ -	if (__hardware_enable_nolock()) -		atomic_inc(failed); -} -  static int kvm_online_cpu(unsigned int cpu)  { -	int ret = 0; -  	/*  	 * Abort the CPU online process if hardware virtualization cannot  	 * be enabled. Otherwise running VMs would encounter unrecoverable  	 * errors when scheduled to this CPU.  	 */ -	mutex_lock(&kvm_usage_lock); -	if (kvm_usage_count) -		ret = __hardware_enable_nolock(); -	mutex_unlock(&kvm_usage_lock); -	return ret; +	return hardware_enable_nolock();  }  static void hardware_disable_nolock(void *junk)  { -	/* -	 * Note, hardware_disable_all_nolock() tells all online CPUs to disable -	 * hardware, not just CPUs that successfully enabled hardware! -	 */  	if (!__this_cpu_read(hardware_enabled))  		return; @@ -5631,78 +5615,10 @@ static void hardware_disable_nolock(void *junk)  static int kvm_offline_cpu(unsigned int cpu)  { -	mutex_lock(&kvm_usage_lock); -	if (kvm_usage_count) -		hardware_disable_nolock(NULL); -	mutex_unlock(&kvm_usage_lock); +	hardware_disable_nolock(NULL);  	return 0;  } -static void hardware_disable_all_nolock(void) -{ -	BUG_ON(!kvm_usage_count); - -	kvm_usage_count--; -	if (!kvm_usage_count) -		on_each_cpu(hardware_disable_nolock, NULL, 1); -} - -static void hardware_disable_all(void) -{ -	cpus_read_lock(); -	mutex_lock(&kvm_usage_lock); -	hardware_disable_all_nolock(); -	mutex_unlock(&kvm_usage_lock); -	cpus_read_unlock(); -} - -static int hardware_enable_all(void) -{ -	atomic_t failed = ATOMIC_INIT(0); -	int r; - -	/* -	 * Do not enable hardware virtualization if the system is going down. -	 * If userspace initiated a forced reboot, e.g. reboot -f, then it's -	 * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling -	 * after kvm_reboot() is called.  Note, this relies on system_state -	 * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops -	 * hook instead of registering a dedicated reboot notifier (the latter -	 * runs before system_state is updated). -	 */ -	if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || -	    system_state == SYSTEM_RESTART) -		return -EBUSY; - -	/* -	 * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() -	 * is called, and so on_each_cpu() between them includes the CPU that -	 * is being onlined.  As a result, hardware_enable_nolock() may get -	 * invoked before kvm_online_cpu(), which also enables hardware if the -	 * usage count is non-zero.  Disable CPU hotplug to avoid attempting to -	 * enable hardware multiple times. -	 */ -	cpus_read_lock(); -	mutex_lock(&kvm_usage_lock); - -	r = 0; - -	kvm_usage_count++; -	if (kvm_usage_count == 1) { -		on_each_cpu(hardware_enable_nolock, &failed, 1); - -		if (atomic_read(&failed)) { -			hardware_disable_all_nolock(); -			r = -EBUSY; -		} -	} - -	mutex_unlock(&kvm_usage_lock); -	cpus_read_unlock(); - -	return r; -} -  static void kvm_shutdown(void)  {  	/* @@ -5734,8 +5650,7 @@ static int kvm_suspend(void)  	lockdep_assert_not_held(&kvm_usage_lock);  	lockdep_assert_irqs_disabled(); -	if (kvm_usage_count) -		hardware_disable_nolock(NULL); +	hardware_disable_nolock(NULL);  	return 0;  } @@ -5744,8 +5659,7 @@ static void kvm_resume(void)  	lockdep_assert_not_held(&kvm_usage_lock);  	lockdep_assert_irqs_disabled(); -	if (kvm_usage_count) -		WARN_ON_ONCE(__hardware_enable_nolock()); +	WARN_ON_ONCE(hardware_enable_nolock());  }  static struct syscore_ops kvm_syscore_ops = { @@ -5753,6 +5667,60 @@ static struct syscore_ops kvm_syscore_ops = {  	.resume = kvm_resume,  	.shutdown = kvm_shutdown,  }; + +static int hardware_enable_all(void) +{ +	int r; + +	guard(mutex)(&kvm_usage_lock); + +	if (kvm_usage_count++) +		return 0; + +	r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", +			      kvm_online_cpu, kvm_offline_cpu); +	if (r) +		goto err_cpuhp; + +	register_syscore_ops(&kvm_syscore_ops); + +	/* +	 * Undo virtualization enabling and bail if the system is going down. +	 * If userspace initiated a forced reboot, e.g. reboot -f, then it's +	 * possible for an in-flight operation to enable virtualization after +	 * syscore_shutdown() is called, i.e. without kvm_shutdown() being +	 * invoked.  Note, this relies on system_state being set _before_ +	 * kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked +	 * or this CPU observes the impending shutdown.  Which is why KVM uses +	 * a syscore ops hook instead of registering a dedicated reboot +	 * notifier (the latter runs before system_state is updated). +	 */ +	if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || +	    system_state == SYSTEM_RESTART) { +		r = -EBUSY; +		goto err_rebooting; +	} + +	return 0; + +err_rebooting: +	unregister_syscore_ops(&kvm_syscore_ops); +	cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); +err_cpuhp: +	--kvm_usage_count; +	return r; +} + +static void hardware_disable_all(void) +{ +	guard(mutex)(&kvm_usage_lock); + +	if (--kvm_usage_count) +		return; + +	unregister_syscore_ops(&kvm_syscore_ops); +	cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); +}  #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */  static int hardware_enable_all(void)  { @@ -6461,15 +6429,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)  	int r;  	int cpu; -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -	r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", -				      kvm_online_cpu, kvm_offline_cpu); -	if (r) -		return r; - -	register_syscore_ops(&kvm_syscore_ops); -#endif -  	/* A kmem cache lets us meet the alignment requirements of fx_save. */  	if (!vcpu_align)  		vcpu_align = __alignof__(struct kvm_vcpu); @@ -6480,10 +6439,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)  					   offsetofend(struct kvm_vcpu, stats_id)  					   - offsetof(struct kvm_vcpu, arch),  					   NULL); -	if (!kvm_vcpu_cache) { -		r = -ENOMEM; -		goto err_vcpu_cache; -	} +	if (!kvm_vcpu_cache) +		return -ENOMEM;  	for_each_possible_cpu(cpu) {  		if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), @@ -6540,11 +6497,6 @@ err_cpu_kick_mask:  	for_each_possible_cpu(cpu)  		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));  	kmem_cache_destroy(kvm_vcpu_cache); -err_vcpu_cache: -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -	unregister_syscore_ops(&kvm_syscore_ops); -	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif  	return r;  }  EXPORT_SYMBOL_GPL(kvm_init); @@ -6566,10 +6518,6 @@ void kvm_exit(void)  	kmem_cache_destroy(kvm_vcpu_cache);  	kvm_vfio_ops_exit();  	kvm_async_pf_deinit(); -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -	unregister_syscore_ops(&kvm_syscore_ops); -	cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif  	kvm_irqfd_exit();  }  EXPORT_SYMBOL_GPL(kvm_exit); | 
