From 0f062bfe36b63cd24f16afe2822d0df7c27904d9 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 28 Feb 2019 18:33:00 +0000 Subject: KVM: arm/arm64: Add hook for arch-specific KVM initialisation This patch adds a kvm_arm_init_arch_resources() hook to perform subarch-specific initialisation when starting up KVM. This will be used in a subsequent patch for global SVE-related setup on arm64. No functional change. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..c69e1370a5dc 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1664,6 +1664,10 @@ int kvm_arch_init(void *opaque) if (err) return err; + err = kvm_arm_init_arch_resources(); + if (err) + return err; + if (!in_hyp_mode) { err = init_hyp_mode(); if (err) -- cgit From 7dd32a0d0103a5941efbb971f85a3e930cc5665e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 19 Dec 2018 14:27:01 +0000 Subject: KVM: arm/arm64: Add KVM_ARM_VCPU_FINALIZE ioctl Some aspects of vcpu configuration may be too complex to be completed inside KVM_ARM_VCPU_INIT. Thus, there may be a requirement for userspace to do some additional configuration before various other ioctls will work in a consistent way. In particular this will be the case for SVE, where userspace will need to negotiate the set of vector lengths to be made available to the guest before the vcpu becomes fully usable. In order to provide an explicit way for userspace to confirm that it has finished setting up a particular vcpu feature, this patch adds a new ioctl KVM_ARM_VCPU_FINALIZE. When userspace has opted into a feature that requires finalization, typically by means of a feature flag passed to KVM_ARM_VCPU_INIT, a matching call to KVM_ARM_VCPU_FINALIZE is now required before KVM_RUN or KVM_GET_REG_LIST is allowed. Individual features may impose additional restrictions where appropriate. No existing vcpu features are affected by this, so current userspace implementations will continue to work exactly as before, with no need to issue KVM_ARM_VCPU_FINALIZE. As implemented in this patch, KVM_ARM_VCPU_FINALIZE is currently a placeholder: no finalizable features exist yet, so ioctl is not required and will always yield EINVAL. Subsequent patches will add the finalization logic to make use of this ioctl for SVE. No functional change for existing userspace. Signed-off-by: Dave Martin Reviewed-by: Julien Thierry Tested-by: zhang.lei Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c69e1370a5dc..9edbf0f676e7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -545,6 +545,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) if (likely(vcpu->arch.has_run_once)) return 0; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + return -EPERM; + vcpu->arch.has_run_once = true; if (likely(irqchip_in_kernel(kvm))) { @@ -1116,6 +1119,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (unlikely(!kvm_vcpu_initialized(vcpu))) break; + r = -EPERM; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + break; + r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) break; @@ -1169,6 +1176,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return kvm_arm_vcpu_set_events(vcpu, &events); } + case KVM_ARM_VCPU_FINALIZE: { + int what; + + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + + if (get_user(what, (const int __user *)argp)) + return -EFAULT; + + return kvm_arm_vcpu_finalize(vcpu, what); + } default: r = -EINVAL; } -- cgit From c110ae578ca0a10064dfbda3d786d6a733b9fe69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2019 17:24:03 +0100 Subject: kvm: move KVM_CAP_NR_MEMSLOTS to common code All architectures except MIPS were defining it in the same way, and memory slots are handled entirely by common code so there is no point in keeping the definition per-architecture. Signed-off-by: Paolo Bonzini --- virt/kvm/arm/arm.c | 3 --- virt/kvm/kvm_main.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 99c37384ba7b..be4ec5f3ba5f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -224,9 +224,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; - case KVM_CAP_NR_MEMSLOTS: - r = KVM_USER_MEM_SLOTS; - break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dc8edc97ba85..684b67252cd5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3063,6 +3063,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif case KVM_CAP_MAX_VCPU_ID: return KVM_MAX_VCPU_ID; + case KVM_CAP_NR_MEMSLOTS: + return KVM_USER_MEM_SLOTS; default: break; } -- cgit From a3be836df7cb777fa8ecbfd662224bfe0394f771 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 12 Apr 2019 15:30:58 +0100 Subject: KVM: arm/arm64: Demote kvm_arm_init_arch_resources() to just set up SVE The introduction of kvm_arm_init_arch_resources() looks like premature factoring, since nothing else uses this hook yet and it is not clear what will use it in the future. For now, let's not pretend that this is a general thing: This patch simply renames the function to kvm_arm_init_sve(), retaining the arm stub version under the new name. Suggested-by: Andrew Jones Signed-off-by: Dave Martin Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9edbf0f676e7..7039c99cc217 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1682,7 +1682,7 @@ int kvm_arch_init(void *opaque) if (err) return err; - err = kvm_arm_init_arch_resources(); + err = kvm_arm_init_sve(); if (err) return err; -- cgit From 384b40caa8afae44a54e8f69bd37097c0279fdce Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 23 Apr 2019 10:12:35 +0530 Subject: KVM: arm/arm64: Context-switch ptrauth registers When pointer authentication is supported, a guest may wish to use it. This patch adds the necessary KVM infrastructure for this to work, with a semi-lazy context switch of the pointer auth state. Pointer authentication feature is only enabled when VHE is built in the kernel and present in the CPU implementation so only VHE code paths are modified. When we schedule a vcpu, we disable guest usage of pointer authentication instructions and accesses to the keys. While these are disabled, we avoid context-switching the keys. When we trap the guest trying to use pointer authentication functionality, we change to eagerly context-switching the keys, and enable the feature. The next time the vcpu is scheduled out/in, we start again. However the host key save is optimized and implemented inside ptrauth instruction/register access trap. Pointer authentication consists of address authentication and generic authentication, and CPUs in a system might have varied support for either. Where support for either feature is not uniform, it is hidden from guests via ID register emulation, as a result of the cpufeature framework in the host. Unfortunately, address authentication and generic authentication cannot be trapped separately, as the architecture provides a single EL2 trap covering both. If we wish to expose one without the other, we cannot prevent a (badly-written) guest from intermittently using a feature which is not uniformly supported (when scheduled on a physical CPU which supports the relevant feature). Hence, this patch expects both type of authentication to be present in a cpu. This switch of key is done from guest enter/exit assembly as preparation for the upcoming in-kernel pointer authentication support. Hence, these key switching routines are not implemented in C code as they may cause pointer authentication key signing error in some situations. Signed-off-by: Mark Rutland [Only VHE, key switch in full assembly, vcpu_has_ptrauth checks , save host key in ptrauth exception trap] Signed-off-by: Amit Daniel Kachhap Reviewed-by: Julien Thierry Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu [maz: various fixups] Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7039c99cc217..156c09da9e2b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -385,6 +385,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear_wfe_traps(vcpu); else vcpu_set_wfe_traps(vcpu); + + vcpu_ptrauth_setup_lazy(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- cgit From 630a16854d2d28d13e96ff27ab43cc5caa4609fc Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 20:22:11 +0100 Subject: arm64: KVM: Encapsulate kvm_cpu_context in kvm_host_data The virt/arm core allocates a kvm_cpu_context_t percpu, at present this is a typedef to kvm_cpu_context and is used to store host cpu context. The kvm_cpu_context structure is also used elsewhere to hold vcpu context. In order to use the percpu to hold additional future host information we encapsulate kvm_cpu_context in a new structure and rename the typedef and percpu to match. Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 156c09da9e2b..e960b91551d6 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -56,7 +56,7 @@ __asm__(".arch_extension virt"); #endif -DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); /* Per-CPU variable containing the currently running vcpu. */ @@ -360,8 +360,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { int *last_ran; + kvm_host_data_t *cpu_data; last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + cpu_data = this_cpu_ptr(&kvm_host_data); /* * We might get preempted before the vCPU actually runs, but @@ -373,7 +375,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } vcpu->cpu = cpu; - vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state); + vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); @@ -1569,11 +1571,11 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - kvm_cpu_context_t *cpu_ctxt; + kvm_host_data_t *cpu_data; - cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu); - kvm_init_host_cpu_context(cpu_ctxt, cpu); - err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); + cpu_data = per_cpu_ptr(&kvm_host_data, cpu); + kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu); + err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); if (err) { kvm_err("Cannot map host CPU state: %d\n", err); -- cgit From 435e53fb5e21ad1820c5c69f208304c0e5623d01 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 20:22:15 +0100 Subject: arm64: KVM: Enable VHE support for :G/:H perf event modifiers With VHE different exception levels are used between the host (EL2) and guest (EL1) with a shared exception level for userpace (EL0). We can take advantage of this and use the PMU's exception level filtering to avoid enabling/disabling counters in the world-switch code. Instead we just modify the counter type to include or exclude EL0 at vcpu_{load,put} time. We also ensure that trapped PMU system register writes do not re-enable EL0 when reconfiguring the backing perf events. This approach completely avoids blackout windows seen with !VHE. Suggested-by: Christoffer Dall Signed-off-by: Andrew Murray Acked-by: Will Deacon Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index e960b91551d6..8b7ca101f0f7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -382,6 +382,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); kvm_arch_vcpu_load_fp(vcpu); + kvm_vcpu_pmu_restore_guest(vcpu); if (single_task_running()) vcpu_clear_wfe_traps(vcpu); @@ -397,6 +398,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); + kvm_vcpu_pmu_restore_host(vcpu); vcpu->cpu = -1; -- cgit From cdd6ad3ac63d2fa320baefcf92a02a918375c30f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 Mar 2019 05:30:01 -0500 Subject: KVM: polling: add architecture backend to disable polling There are cases where halt polling is unwanted. For example when running KVM on an over committed LPAR we rather want to give back the CPU to neighbour LPARs instead of polling. Let us provide a callback that allows architectures to disable polling. Signed-off-by: Christian Borntraeger Acked-by: Paolo Bonzini Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ea434ddc8499..aad9284c043a 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -57,3 +57,6 @@ config HAVE_KVM_VCPU_ASYNC_IOCTL config HAVE_KVM_VCPU_RUN_PID_CHANGE bool + +config HAVE_KVM_NO_POLL + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55fe8e20d8fd..23aec2f4ba71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2253,7 +2253,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) u64 block_ns; start = cur = ktime_get(); - if (vcpu->halt_poll_ns) { + if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); ++vcpu->stat.halt_attempted_poll; -- cgit From a1cd3f0883f435e5f9ae6530d7e62b361c87a91a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:36 +0200 Subject: KVM: Introduce a 'mmap' method for KVM devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some KVM devices will want to handle special mappings related to the underlying HW. For instance, the XIVE interrupt controller of the POWER9 processor has MMIO pages for thread interrupt management and for interrupt source control that need to be exposed to the guest when the OS has the required support. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- virt/kvm/kvm_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 55fe8e20d8fd..ea2018ae1cd7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2884,6 +2884,16 @@ out: } #endif +static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct kvm_device *dev = filp->private_data; + + if (dev->ops->mmap) + return dev->ops->mmap(dev, vma); + + return -ENODEV; +} + static int kvm_device_ioctl_attr(struct kvm_device *dev, int (*accessor)(struct kvm_device *dev, struct kvm_device_attr *attr), @@ -2936,6 +2946,7 @@ static const struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, .release = kvm_device_release, KVM_COMPAT(kvm_device_ioctl), + .mmap = kvm_device_mmap, }; struct kvm_device *kvm_device_from_filp(struct file *filp) -- cgit From 2bde9b3ec8bdf60788e9e2ce8c07a2f8d6003dbd Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 18 Apr 2019 12:39:41 +0200 Subject: KVM: Introduce a 'release' method for KVM devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a P9 sPAPR VM boots, the CAS negotiation process determines which interrupt mode to use (XICS legacy or XIVE native) and invokes a machine reset to activate the chosen mode. To be able to switch from one interrupt mode to another, we introduce the capability to release a KVM device without destroying the VM. The KVM device interface is extended with a new 'release' method which is called when the file descriptor of the device is closed. Once 'release' is called, the 'destroy' method will not be called anymore as the device is removed from the device list of the VM. Cc: Paolo Bonzini Signed-off-by: Cédric Le Goater Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- virt/kvm/kvm_main.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea2018ae1cd7..ea2619d5ca98 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2938,6 +2938,19 @@ static int kvm_device_release(struct inode *inode, struct file *filp) struct kvm_device *dev = filp->private_data; struct kvm *kvm = dev->kvm; + if (!dev) + return -ENODEV; + + if (dev->kvm != kvm) + return -EPERM; + + if (dev->ops->release) { + mutex_lock(&kvm->lock); + list_del(&dev->vm_node); + dev->ops->release(dev); + mutex_unlock(&kvm->lock); + } + kvm_put_kvm(kvm); return 0; } -- cgit From 65c4189de8c1d995f6bc2cc96b22206405466b53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 17 Apr 2019 15:28:44 +0200 Subject: KVM: fix KVM_CLEAR_DIRTY_LOG for memory slots of unaligned size If a memory slot's size is not a multiple of 64 pages (256K), then the KVM_CLEAR_DIRTY_LOG API is unusable: clearing the final 64 pages either requires the requested page range to go beyond memslot->npages, or requires log->num_pages to be unaligned, and kvm_clear_dirty_log_protect requires log->num_pages to be both in range and aligned. To allow this case, allow log->num_pages not to be a multiple of 64 if it ends exactly on the last page of the slot. Reported-by: Peter Xu Fixes: 98938aa8edd6 ("KVM: validate userspace input in kvm_clear_dirty_log_protect()", 2019-01-02) Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 71ac0de892dc..e9ca417b9ae9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) return -EINVAL; - if ((log->first_page & 63) || (log->num_pages & 63)) + if (log->first_page & 63) return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, n = kvm_dirty_bitmap_bytes(memslot); if (log->first_page > memslot->npages || - log->num_pages > memslot->npages - log->first_page) - return -EINVAL; + log->num_pages > memslot->npages - log->first_page || + (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63))) + return -EINVAL; *flush = false; dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); -- cgit From b8b002209c061273fd1ef7bb3c3c32301623a282 Mon Sep 17 00:00:00 2001 From: Jiang Biao Date: Tue, 23 Apr 2019 19:40:30 +0800 Subject: kvm_main: fix some comments is_dirty has been renamed to flush, but the comment for it is outdated. And the description about @flush parameter for kvm_clear_dirty_log_protect() is missing, add it in this patch as well. Signed-off-by: Jiang Biao Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e9ca417b9ae9..3194aa3d0b43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1134,11 +1134,11 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT /** - * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages + * kvm_get_dirty_log_protect - get a snapshot of dirty pages * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address to which we copy the log - * @is_dirty: flag set if any page is dirty + * @flush: true if TLB flush is needed by caller * * We need to keep it in mind that VCPU threads can write to the bitmap * concurrently. So, to avoid losing track of dirty pages we keep the @@ -1223,6 +1223,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); * and reenable dirty page tracking for the corresponding pages. * @kvm: pointer to kvm instance * @log: slot id and address from which to fetch the bitmap of dirty pages + * @flush: true if TLB flush is needed by caller */ int kvm_clear_dirty_log_protect(struct kvm *kvm, struct kvm_clear_dirty_log *log, bool *flush) -- cgit From e45adf665a53df0db37f784ed87c6b57ddd81885 Mon Sep 17 00:00:00 2001 From: KarimAllah Ahmed Date: Thu, 31 Jan 2019 21:24:34 +0100 Subject: KVM: Introduce a new guest mapping API In KVM, specially for nested guests, there is a dominant pattern of: => map guest memory -> do_something -> unmap guest memory In addition to all this unnecessarily noise in the code due to boiler plate code, most of the time the mapping function does not properly handle memory that is not backed by "struct page". This new guest mapping API encapsulate most of this boiler plate code and also handles guest memory that is not backed by "struct page". The current implementation of this API is using memremap for memory that is not backed by a "struct page" which would lead to a huge slow-down if it was used for high-frequency mapping operations. The API does not have any effect on current setups where guest memory is backed by a "struct page". Further patches are going to also introduce a pfn-cache which would significantly improve the performance of the memremap case. Signed-off-by: KarimAllah Ahmed Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3194aa3d0b43..53de2f946f9e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1742,6 +1742,70 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); +static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, + struct kvm_host_map *map) +{ + kvm_pfn_t pfn; + void *hva = NULL; + struct page *page = KVM_UNMAPPED_PAGE; + + if (!map) + return -EINVAL; + + pfn = gfn_to_pfn_memslot(slot, gfn); + if (is_error_noslot_pfn(pfn)) + return -EINVAL; + + if (pfn_valid(pfn)) { + page = pfn_to_page(pfn); + hva = kmap(page); + } else { + hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); + } + + if (!hva) + return -EFAULT; + + map->page = page; + map->hva = hva; + map->pfn = pfn; + map->gfn = gfn; + + return 0; +} + +int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) +{ + return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_map); + +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + bool dirty) +{ + if (!map) + return; + + if (!map->hva) + return; + + if (map->page) + kunmap(map->page); + else + memunmap(map->hva); + + if (dirty) { + kvm_vcpu_mark_page_dirty(vcpu, map->gfn); + kvm_release_pfn_dirty(map->pfn); + } else { + kvm_release_pfn_clean(map->pfn); + } + + map->hva = NULL; + map->page = NULL; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); + struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) { kvm_pfn_t pfn; -- cgit From 4ddc9204572c33f2eb91fbdb1d99d8078388b67d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:45 +0800 Subject: KVM: Fix the bitmap range to copy during clear dirty kvm_dirty_bitmap_bytes() will return the size of the dirty bitmap of the memslot rather than the size of bitmap passed over from the ioctl. Here for KVM_CLEAR_DIRTY_LOG we should only copy exactly the size of bitmap that covers kvm_clear_dirty_log.num_pages. Signed-off-by: Peter Xu Cc: stable@vger.kernel.org Fixes: 2a31b9db153530df4aa02dac8c32837bf5f47019 Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 53de2f946f9e..ad39c57de82d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1251,7 +1251,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, if (!dirty_bitmap) return -ENOENT; - n = kvm_dirty_bitmap_bytes(memslot); + n = ALIGN(log->num_pages, BITS_PER_LONG) / 8; if (log->first_page > memslot->npages || log->num_pages > memslot->npages - log->first_page || -- cgit From 53eac7a8f8cf3d7dc5ecac1946f31442f5eee5f3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:46 +0800 Subject: KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one Just imaging the case where num_pages < BITS_PER_LONG, then the loop will be skipped while it shouldn't. Signed-off-by: Peter Xu Fixes: 2a31b9db153530df4aa02dac8c32837bf5f47019 Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ad39c57de82d..7883e0ad07fe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1264,8 +1264,8 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm, return -EFAULT; spin_lock(&kvm->mmu_lock); - for (offset = log->first_page, - i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--; + for (offset = log->first_page, i = offset / BITS_PER_LONG, + n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; i++, offset += BITS_PER_LONG) { unsigned long mask = *dirty_bitmap_buffer++; atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i]; -- cgit From d7547c55cbe7471255ca51f14bcd4699f5eaabe5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 May 2019 17:15:47 +0800 Subject: KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 The previous KVM_CAP_MANUAL_DIRTY_LOG_PROTECT has some problem which blocks the correct usage from userspace. Obsolete the old one and introduce a new capability bit for it. Suggested-by: Paolo Bonzini Signed-off-by: Peter Xu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7883e0ad07fe..f4e02cd8fa43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3110,7 +3110,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_CHECK_EXTENSION_VM: case KVM_CAP_ENABLE_CAP_VM: #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: #endif return 1; #ifdef CONFIG_KVM_MMIO @@ -3148,7 +3148,7 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, { switch (cap->cap) { #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT - case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT: + case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2: if (cap->flags || (cap->args[0] & ~1)) return -EINVAL; kvm->manual_dirty_log_protect = cap->args[0]; -- cgit From 4894fbcce856635c9ab79f44e50826e86bb92110 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 9 May 2019 14:33:44 +0200 Subject: KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to test for the device pointer validity when releasing a KVM device. The file descriptor should identify it safely. Fixes: 2bde9b3ec8bd ("KVM: Introduce a 'release' method for KVM devices") Signed-off-by: Cédric Le Goater Reviewed-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- virt/kvm/kvm_main.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ea2619d5ca98..37149433c07a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2938,12 +2938,6 @@ static int kvm_device_release(struct inode *inode, struct file *filp) struct kvm_device *dev = filp->private_data; struct kvm *kvm = dev->kvm; - if (!dev) - return -ENODEV; - - if (dev->kvm != kvm) - return -EPERM; - if (dev->ops->release) { mutex_lock(&kvm->lock); list_del(&dev->vm_node); -- cgit From c011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 17 May 2019 14:08:53 +0200 Subject: kvm: fix compilation on aarch64 Commit e45adf665a53 ("KVM: Introduce a new guest mapping API", 2019-01-31) introduced a build failure on aarch64 defconfig: $ make -j$(nproc) ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- O=out defconfig \ Image.gz ... ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c: In function '__kvm_map_gfn': ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1763:9: error: implicit declaration of function 'memremap'; did you mean 'memset_p'? ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1763:46: error: 'MEMREMAP_WB' undeclared (first use in this function) ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c: In function 'kvm_vcpu_unmap': ../arch/arm64/kvm/../../../virt/kvm/kvm_main.c:1795:3: error: implicit declaration of function 'memunmap'; did you mean 'vm_munmap'? because these functions are declared in rather than , and the former was being pulled in already on x86 but not on aarch64. Reported-by: Nathan Chancellor Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d22b1f4bfa56..34afa94f0183 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,9 +51,9 @@ #include #include #include +#include #include -#include #include #include #include -- cgit