From 1a9167a214f560a23c5050ce6dfebae489528f0d Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 19 Jun 2019 13:01:27 -0300 Subject: KVM: PPC: Report single stepping capability When calling the KVM_SET_GUEST_DEBUG ioctl, userspace might request the next instruction to be single stepped via the KVM_GUESTDBG_SINGLESTEP control bit of the kvm_guest_debug structure. This patch adds the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability in order to inform userspace about the state of single stepping support. We currently don't have support for guest single stepping implemented in Book3S HV so the capability is only present for Book3S PR and BookE. Signed-off-by: Fabiano Rosas Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a77bb643452..9e085e931d74 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -522,6 +522,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; + case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: + /* fall through */ case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -- cgit From 258ed7d02843052d127df2264c8b342276ced18a Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 23 Sep 2019 18:30:23 -0300 Subject: KVM: PPC: Reduce calls to get current->mm by storing the value locally Reduces the number of calls to get_current() in order to get the value of current->mm by doing it once and storing the value, since it is not supposed to change inside the same process). Signed-off-by: Leonardo Bras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 9a75f0e1933b..f2b9aea43216 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -508,6 +508,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct vm_area_struct *vma; unsigned long rcbits; long mmio_update; + struct mm_struct *mm; if (kvm_is_radix(kvm)) return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); @@ -584,6 +585,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = false; pfn = 0; page = NULL; + mm = current->mm; pte_size = PAGE_SIZE; writing = (dsisr & DSISR_ISSTORE) != 0; /* If writing != 0, then the HPTE must allow writing, if we get here */ @@ -592,8 +594,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, hva); + down_read(&mm->mmap_sem); + vma = find_vma(mm, hva); if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && (vma->vm_flags & VM_PFNMAP)) { pfn = vma->vm_pgoff + @@ -602,7 +604,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); write_ok = vma->vm_flags & VM_WRITE; } - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); if (!pfn) goto out_put; } else { @@ -621,8 +623,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_current_mm_pte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(mm->pgd, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) -- cgit From f41c4989c8de1fa70aafe950abaf80c56a8b8712 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 23 Sep 2019 18:24:08 -0300 Subject: KVM: PPC: E500: Replace current->mm by kvm->mm Given that in kvm_create_vm() there is: kvm->mm = current->mm; And that on every kvm_*_ioctl we have: if (kvm->mm != current->mm) return -EIO; I see no reason to keep using current->mm instead of kvm->mm. By doing so, we would reduce the use of 'global' variables on code, relying more in the contents of kvm struct. Signed-off-by: Leonardo Bras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/e500_mmu_host.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 321db0fdb9db..425d13806645 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -355,9 +355,9 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (tlbsel == 1) { struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); + down_read(&kvm->mm->mmap_sem); - vma = find_vma(current->mm, hva); + vma = find_vma(kvm->mm, hva); if (vma && hva >= vma->vm_start && (vma->vm_flags & VM_PFNMAP)) { /* @@ -441,7 +441,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); } - up_read(¤t->mm->mmap_sem); + up_read(&kvm->mm->mmap_sem); } if (likely(!pfnmap)) { -- cgit From e7d71c943040c23f2fd042033d319f56e84f845b Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:53:38 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Set kvm->arch.xive when VPs are allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we cannot allocate the XIVE VPs in OPAL, the creation of a XIVE or XICS-on-XIVE device is aborted as expected, but we leave kvm->arch.xive set forever since the release method isn't called in this case. Any subsequent tentative to create a XIVE or XICS-on-XIVE for this VM will thus always fail (DoS). This is a problem for QEMU since it destroys and re-creates these devices when the VM is reset: the VM would be restricted to using the much slower emulated XIVE or XICS forever. As an alternative to adding rollback, do not assign kvm->arch.xive before making sure the XIVE VPs are allocated in OPAL. Cc: stable@vger.kernel.org # v5.2 Fixes: 5422e95103cf ("KVM: PPC: Book3S HV: XIVE: Replace the 'destroy' method by a 'release' method") Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 11 +++++------ arch/powerpc/kvm/book3s_xive_native.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a3f9c665bb5b..baa740815b3c 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2005,6 +2005,10 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) pr_devel("Creating xive for partition\n"); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; @@ -2014,12 +2018,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->kvm = kvm; mutex_init(&xive->lock); - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; - /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); if (xive->q_order < PAGE_SHIFT) @@ -2039,6 +2037,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 78b906ffa0d2..ebb4215baf45 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1081,7 +1081,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) dev->private = xive; xive->dev = dev; xive->kvm = kvm; - kvm->arch.xive = xive; mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); @@ -1102,6 +1101,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } -- cgit From 8a4e7597ba1e41030189b73cd7261f4383588d1d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:53:49 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Show VP id in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print out the VP id of each connected vCPU, this allow to see: - the VP block base in which OPAL encodes information that may be useful when debugging - the packed vCPU id which may differ from the raw vCPU id if the latter is >= KVM_MAX_VCPUS (2048) Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 4 ++-- arch/powerpc/kvm/book3s_xive_native.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index baa740815b3c..0b7859e40f66 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2107,9 +2107,9 @@ static int xive_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x" + seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x" " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n", - xc->server_num, xc->cppr, xc->hw_cppr, + xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr, xc->mfrr, xc->pending, xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index ebb4215baf45..43a86858390a 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1204,8 +1204,8 @@ static int xive_native_debug_show(struct seq_file *m, void *private) if (!xc) continue; - seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", - xc->server_num, + seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", + xc->server_num, xc->vp_id, vcpu->arch.xive_saved_state.nsr, vcpu->arch.xive_saved_state.cppr, vcpu->arch.xive_saved_state.ipb, -- cgit From 8db29ea2391cc6f5b73cc9c04b2dee4409b9fc05 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:53:55 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Compute the VP id in a common helper Reduce code duplication by consolidating the checking of vCPU ids and VP ids to a common helper used by both legacy and native XIVE KVM devices. And explain the magic with a comment. Signed-off-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 42 +++++++++++++++++++++++++++-------- arch/powerpc/kvm/book3s_xive.h | 1 + arch/powerpc/kvm/book3s_xive_native.c | 11 ++------- 3 files changed, 36 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 0b7859e40f66..d84da9f6ee88 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1211,6 +1211,37 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.xive_vcpu = NULL; } +static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) +{ + /* We have a block of KVM_MAX_VCPUS VPs. We just need to check + * raw vCPU ids are below the expected limit for this guest's + * core stride ; kvmppc_pack_vcpu_id() will pack them down to an + * index that can be safely used to compute a VP id that belongs + * to the VP block. + */ + return cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode; +} + +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) +{ + u32 vp_id; + + if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { + pr_devel("Out of bounds !\n"); + return -EINVAL; + } + + vp_id = kvmppc_xive_vp(xive, cpu); + if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { + pr_devel("Duplicate !\n"); + return -EEXIST; + } + + *vp = vp_id; + + return 0; +} + int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu) { @@ -1229,20 +1260,13 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } /* We need to synchronize with queue provisioning */ mutex_lock(&xive->lock); - vp_id = kvmppc_xive_vp(xive, cpu); - if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { - pr_devel("Duplicate !\n"); - r = -EEXIST; + r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id); + if (r) goto bail; - } xc = kzalloc(sizeof(*xc), GFP_KERNEL); if (!xc) { diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index fe3ed50e0818..90cf6ec35a68 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -296,6 +296,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); +int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 43a86858390a..5bb480b2aafd 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -118,19 +118,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, return -EPERM; if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; - if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { - pr_devel("Out of bounds !\n"); - return -EINVAL; - } mutex_lock(&xive->lock); - vp_id = kvmppc_xive_vp(xive, server_num); - if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { - pr_devel("Duplicate !\n"); - rc = -EEXIST; + rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); + if (rc) goto bail; - } xc = kzalloc(sizeof(*xc), GFP_KERNEL); if (!xc) { -- cgit From 062cfab7069fcb55d77ad5552f29e24178728fa2 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:54:01 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Make VP block size configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XIVE VP is an internal structure which allow the XIVE interrupt controller to maintain the interrupt context state of vCPUs non dispatched on HW threads. When a guest is started, the XIVE KVM device allocates a block of XIVE VPs in OPAL, enough to accommodate the highest possible vCPU id KVM_MAX_VCPU_ID (16384) packed down to KVM_MAX_VCPUS (2048). With a guest's core stride of 8 and a threading mode of 1 (QEMU's default), a VM must run at least 256 vCPUs to actually need such a range of VPs. A POWER9 system has a limited XIVE VP space : 512k and KVM is currently wasting this HW resource with large VP allocations, especially since a typical VM likely runs with a lot less vCPUs. Make the size of the VP block configurable. Add an nr_servers field to the XIVE structure and a function to set it for this purpose. Split VP allocation out of the device create function. Since the VP block isn't used before the first vCPU connects to the XIVE KVM device, allocation is now performed by kvmppc_xive_connect_vcpu(). This gives the opportunity to set nr_servers in between: kvmppc_xive_create() / kvmppc_xive_native_create() . . kvmppc_xive_set_nr_servers() . . kvmppc_xive_connect_vcpu() / kvmppc_xive_native_connect_vcpu() The connect_vcpu() functions check that the vCPU id is below nr_servers and if it is the first vCPU they allocate the VP block. This is protected against a concurrent update of nr_servers by kvmppc_xive_set_nr_servers() with the xive->lock mutex. Also, the block is allocated once for the device lifetime: nr_servers should stay constant otherwise connect_vcpu() could generate a boggus VP id and likely crash OPAL. It is thus forbidden to update nr_servers once the block is allocated. If the VP allocation fail, return ENOSPC which seems more appropriate to report the depletion of system wide HW resource than ENOMEM or ENXIO. A VM using a stride of 8 and 1 thread per core with 32 vCPUs would hence only need 256 VPs instead of 2048. If the stride is set to match the number of threads per core, this goes further down to 32. This will be exposed to userspace by a subsequent patch. Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 65 ++++++++++++++++++++++++++++------- arch/powerpc/kvm/book3s_xive.h | 4 +++ arch/powerpc/kvm/book3s_xive_native.c | 18 +++------- 3 files changed, 62 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index d84da9f6ee88..6c35b3d95986 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1213,13 +1213,13 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) { - /* We have a block of KVM_MAX_VCPUS VPs. We just need to check + /* We have a block of xive->nr_servers VPs. We just need to check * raw vCPU ids are below the expected limit for this guest's * core stride ; kvmppc_pack_vcpu_id() will pack them down to an * index that can be safely used to compute a VP id that belongs * to the VP block. */ - return cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode; + return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; } int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) @@ -1231,6 +1231,14 @@ int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) return -EINVAL; } + if (xive->vp_base == XIVE_INVALID_VP) { + xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); + pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers); + + if (xive->vp_base == XIVE_INVALID_VP) + return -ENOSPC; + } + vp_id = kvmppc_xive_vp(xive, cpu); if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { pr_devel("Duplicate !\n"); @@ -1858,6 +1866,43 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, return 0; } +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) +{ + u32 __user *ubufp = (u32 __user *) addr; + u32 nr_servers; + int rc = 0; + + if (get_user(nr_servers, ubufp)) + return -EFAULT; + + pr_devel("%s nr_servers=%u\n", __func__, nr_servers); + + if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID) + return -EINVAL; + + mutex_lock(&xive->lock); + if (xive->vp_base != XIVE_INVALID_VP) + /* The VP block is allocated once and freed when the device + * is released. Better not allow to change its size since its + * used by connect_vcpu to validate vCPU ids are valid (eg, + * setting it back to a higher value could allow connect_vcpu + * to come up with a VP id that goes beyond the VP block, which + * is likely to cause a crash in OPAL). + */ + rc = -EBUSY; + else if (nr_servers > KVM_MAX_VCPUS) + /* We don't need more servers. Higher vCPU ids get packed + * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). + */ + xive->nr_servers = KVM_MAX_VCPUS; + else + xive->nr_servers = nr_servers; + + mutex_unlock(&xive->lock); + + return rc; +} + static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { struct kvmppc_xive *xive = dev->private; @@ -2025,7 +2070,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive for partition\n"); @@ -2049,18 +2093,15 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) else xive->q_page_order = xive->q_order - PAGE_SHIFT; - /* Allocate a bunch of VPs */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENOMEM; + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. + */ + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); - if (ret) - return ret; - kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 90cf6ec35a68..382e3a56e789 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -135,6 +135,9 @@ struct kvmppc_xive { /* Flags */ u8 single_escalation; + /* Number of entries in the VP block */ + u32 nr_servers; + struct kvmppc_xive_ops *ops; struct address_space *mapping; struct mutex mapping_lock; @@ -297,6 +300,7 @@ struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); +int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5bb480b2aafd..8ab333eabeef 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1060,7 +1060,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) { struct kvmppc_xive *xive; struct kvm *kvm = dev->kvm; - int ret = 0; pr_devel("Creating xive native device\n"); @@ -1077,23 +1076,16 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); - /* - * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for - * a default. Getting the max number of CPUs the VM was - * configured with would improve our usage of the XIVE VP space. + /* VP allocation is delayed to the first call to connect_vcpu */ + xive->vp_base = XIVE_INVALID_VP; + /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets + * on a POWER9 system. */ - xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); - pr_devel("VP_Base=%x\n", xive->vp_base); - - if (xive->vp_base == XIVE_INVALID_VP) - ret = -ENXIO; + xive->nr_servers = KVM_MAX_VCPUS; xive->single_escalation = xive_native_has_single_escalation(); xive->ops = &kvmppc_xive_native_ops; - if (ret) - return ret; - kvm->arch.xive = xive; return 0; } -- cgit From efe5ddcae496b7c7307805d31815df23ba69bf7c Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 27 Sep 2019 13:54:07 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Allow userspace to set the # of VPs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new attribute to both XIVE and XICS-on-XIVE KVM devices so that userspace can tell how many interrupt servers it needs. If a VM needs less than the current default of KVM_MAX_VCPUS (2048), we can allocate less VPs in OPAL. Combined with a core stride (VSMT) that matches the number of guest threads per core, this may substantially increases the number of VMs that can run concurrently with an in-kernel XIVE device. Since the legacy XIVE KVM device is exposed to userspace through the XICS KVM API, a new attribute group is added to it for this purpose. While here, fix the syntax of the existing KVM_DEV_XICS_GRP_SOURCES in the XICS documentation. Signed-off-by: Greg Kurz Reviewed-by: Cédric Le Goater Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive.c | 10 ++++++++++ arch/powerpc/kvm/book3s_xive_native.c | 3 +++ 2 files changed, 13 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6c35b3d95986..66858b7d3c6b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1911,6 +1911,11 @@ static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_XICS_GRP_SOURCES: return xive_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); + } } return -ENXIO; } @@ -1936,6 +1941,11 @@ static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) attr->attr < KVMPPC_XICS_NR_IRQS) return 0; break; + case KVM_DEV_XICS_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_XICS_NR_SERVERS: + return 0; + } } return -ENXIO; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 8ab333eabeef..34bd123fa024 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -921,6 +921,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, return kvmppc_xive_reset(xive); case KVM_DEV_XIVE_EQ_SYNC: return kvmppc_xive_native_eq_sync(xive); + case KVM_DEV_XIVE_NR_SERVERS: + return kvmppc_xive_set_nr_servers(xive, attr->addr); } break; case KVM_DEV_XIVE_GRP_SOURCE: @@ -960,6 +962,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_XIVE_RESET: case KVM_DEV_XIVE_EQ_SYNC: + case KVM_DEV_XIVE_NR_SERVERS: return 0; } break; -- cgit From 9ee6471eb9d43114ba4f0de3e0f483bf6fb2a906 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Oct 2019 16:00:21 +1000 Subject: KVM: PPC: Book3S: Define and use SRR1_MSR_BITS Acked-by: Paul Mackerras Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.c | 2 +- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ec2547cc5ecb..a2336c452905 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -136,7 +136,7 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { kvmppc_unfixup_split_real(vcpu); kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & ~0x783f0000ul) | flags); + kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & SRR1_MSR_BITS) | flags); kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); vcpu->arch.mmu.reset_msr(vcpu); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index cdf30c6eaf54..dc97e5be76f6 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1186,7 +1186,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, forward_to_l1: vcpu->arch.fault_dsisr = flags; if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { - vcpu->arch.shregs.msr &= ~0x783f0000ul; + vcpu->arch.shregs.msr &= SRR1_MSR_BITS; vcpu->arch.shregs.msr |= flags; } return RESUME_HOST; -- cgit From 87a45e07a5abfec4d6b0e8356718f8919d0a3c20 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Oct 2019 16:00:22 +1000 Subject: KVM: PPC: Book3S: Replace reset_msr mmu op with inject_interrupt arch op reset_msr sets the MSR for interrupt injection, but it's cleaner and more flexible to provide a single op to set both MSR and PC for the interrupt. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.c | 27 +------------------------ arch/powerpc/kvm/book3s_32_mmu.c | 6 ------ arch/powerpc/kvm/book3s_64_mmu.c | 15 -------------- arch/powerpc/kvm/book3s_64_mmu_hv.c | 13 ------------ arch/powerpc/kvm/book3s_hv.c | 22 ++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 40 ++++++++++++++++++++++++++++++++++++- 6 files changed, 62 insertions(+), 61 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a2336c452905..58a59ee998e2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -74,27 +74,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { - ulong pc = kvmppc_get_pc(vcpu); - ulong lr = kvmppc_get_lr(vcpu); - if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); - if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) - kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); - vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; - } -} -EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real); - -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) -{ - if (!is_kvmppc_hv_enabled(vcpu->kvm)) - return to_book3s(vcpu)->hior; - return 0; -} - static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { @@ -134,11 +113,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) { - kvmppc_unfixup_split_real(vcpu); - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, (kvmppc_get_msr(vcpu) & SRR1_MSR_BITS) | flags); - kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec); - vcpu->arch.mmu.reset_msr(vcpu); + vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags); } static int kvmppc_book3s_vec2irqprio(unsigned int vec) diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 18f244aad7aa..f21e73492ce3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -90,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); } -static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) -{ - kvmppc_set_msr(vcpu, 0); -} - static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu, u32 sre, gva_t eaddr, bool primary) @@ -406,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin; mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin; mmu->xlate = kvmppc_mmu_book3s_32_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_32_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 5f63a5f7f24f..599133256a95 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -24,20 +24,6 @@ #define dprintk(X...) do { } while(0) #endif -static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - unsigned long cur_msr = kvmppc_get_msr(vcpu); - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(cur_msr)) - msr |= MSR_TS_S; - else - msr |= cur_msr & MSR_TS_MASK; - - kvmppc_set_msr(vcpu, msr); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( struct kvm_vcpu *vcpu, gva_t eaddr) @@ -676,7 +662,6 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) mmu->slbie = kvmppc_mmu_book3s_64_slbie; mmu->slbia = kvmppc_mmu_book3s_64_slbia; mmu->xlate = kvmppc_mmu_book3s_64_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr; mmu->tlbie = kvmppc_mmu_book3s_64_tlbie; mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid; mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f2b9aea43216..4c37e97c75a1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -275,18 +275,6 @@ int kvmppc_mmu_hv_init(void) return 0; } -static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) -{ - unsigned long msr = vcpu->arch.intr_msr; - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) - msr |= MSR_TS_S; - else - msr |= vcpu->arch.shregs.msr & MSR_TS_MASK; - kvmppc_set_msr(vcpu, msr); -} - static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) @@ -2162,7 +2150,6 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; - mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 709cf1fd4cf4..94a0a9911b27 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -338,6 +338,27 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } +static void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = vec; + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + kvmppc_set_msr(vcpu, new_msr); +} + static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { /* @@ -5401,6 +5422,7 @@ static struct kvmppc_ops kvm_ops_hv = { .set_one_reg = kvmppc_set_one_reg_hv, .vcpu_load = kvmppc_core_vcpu_load_hv, .vcpu_put = kvmppc_core_vcpu_put_hv, + .inject_interrupt = kvmppc_inject_interrupt_hv, .set_msr = kvmppc_set_msr_hv, .vcpu_run = kvmppc_vcpu_run_hv, .vcpu_create = kvmppc_core_vcpu_create_hv, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7..ce4fcf76e53e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,7 +90,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu) kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS); } -void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu); +static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { + ulong pc = kvmppc_get_pc(vcpu); + ulong lr = kvmppc_get_lr(vcpu); + if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); + vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; + } +} + +static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + kvmppc_unfixup_split_real(vcpu); + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = to_book3s(vcpu)->hior + vec; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; +#endif + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + kvmppc_set_msr(vcpu, new_msr); +} static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) { @@ -1761,6 +1797,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, #else /* default to book3s_32 (750) */ vcpu->arch.pvr = 0x84202; + vcpu->arch.intr_msr = 0; #endif kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); vcpu->arch.slb_nr = 64; @@ -2058,6 +2095,7 @@ static struct kvmppc_ops kvm_ops_pr = { .set_one_reg = kvmppc_set_one_reg_pr, .vcpu_load = kvmppc_core_vcpu_load_pr, .vcpu_put = kvmppc_core_vcpu_put_pr, + .inject_interrupt = kvmppc_inject_interrupt_pr, .set_msr = kvmppc_set_msr_pr, .vcpu_run = kvmppc_vcpu_run_pr, .vcpu_create = kvmppc_core_vcpu_create_pr, -- cgit From 268f4ef9954cec198cd6772caadf453bcaed3e5a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Oct 2019 16:00:23 +1000 Subject: KVM: PPC: Book3S HV: Reuse kvmppc_inject_interrupt for async guest delivery This consolidates the HV interrupt delivery logic into one place. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s.h | 3 ++ arch/powerpc/kvm/book3s_hv.c | 43 ----------------------- arch/powerpc/kvm/book3s_hv_builtin.c | 67 ++++++++++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 57 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 2ef1311a2a13..3a4613985949 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -32,4 +32,7 @@ extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {} #endif +extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr); +extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags); + #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 94a0a9911b27..c340d416dce3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -133,7 +133,6 @@ static inline bool nesting_enabled(struct kvm *kvm) /* If set, the threads on each CPU core have to be in the same MMU mode */ static bool no_mixing_hpt_and_radix; -static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -338,39 +337,6 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } -static void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) -{ - unsigned long msr, pc, new_msr, new_pc; - - msr = kvmppc_get_msr(vcpu); - pc = kvmppc_get_pc(vcpu); - new_msr = vcpu->arch.intr_msr; - new_pc = vec; - - /* If transactional, change to suspend mode on IRQ delivery */ - if (MSR_TM_TRANSACTIONAL(msr)) - new_msr |= MSR_TS_S; - else - new_msr |= msr & MSR_TS_MASK; - - kvmppc_set_srr0(vcpu, pc); - kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); - kvmppc_set_pc(vcpu, new_pc); - kvmppc_set_msr(vcpu, new_msr); -} - -static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) -{ - /* - * Check for illegal transactional state bit combination - * and if we find it, force the TS field to a safe state. - */ - if ((msr & MSR_TS_MASK) == MSR_TS_MASK) - msr &= ~MSR_TS_MASK; - vcpu->arch.shregs.msr = msr; - kvmppc_end_cede(vcpu); -} - static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.pvr = pvr; @@ -2475,15 +2441,6 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) vcpu->arch.timer_running = 1; } -static void kvmppc_end_cede(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ceded = 0; - if (vcpu->arch.timer_running) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - vcpu->arch.timer_running = 0; - } -} - extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b55..068bee941a71 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -755,6 +755,56 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) local_paca->kvm_hstate.kvm_split_mode = NULL; } +static void kvmppc_end_cede(struct kvm_vcpu *vcpu) +{ + vcpu->arch.ceded = 0; + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } +} + +void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) +{ + /* + * Check for illegal transactional state bit combination + * and if we find it, force the TS field to a safe state. + */ + if ((msr & MSR_TS_MASK) == MSR_TS_MASK) + msr &= ~MSR_TS_MASK; + vcpu->arch.shregs.msr = msr; + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv); + +static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + unsigned long msr, pc, new_msr, new_pc; + + msr = kvmppc_get_msr(vcpu); + pc = kvmppc_get_pc(vcpu); + new_msr = vcpu->arch.intr_msr; + new_pc = vec; + + /* If transactional, change to suspend mode on IRQ delivery */ + if (MSR_TM_TRANSACTIONAL(msr)) + new_msr |= MSR_TS_S; + else + new_msr |= msr & MSR_TS_MASK; + + kvmppc_set_srr0(vcpu, pc); + kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); + kvmppc_set_pc(vcpu, new_pc); + vcpu->arch.shregs.msr = new_msr; +} + +void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) +{ + inject_interrupt(vcpu, vec, srr1_flags); + kvmppc_end_cede(vcpu); +} +EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv); + /* * Is there a PRIV_DOORBELL pending for the guest (on POWER9)? * Can we inject a Decrementer or a External interrupt? @@ -762,7 +812,6 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) { int ext; - unsigned long vec = 0; unsigned long lpcr; /* Insert EXTERNAL bit into LPCR at the MER bit position */ @@ -774,26 +823,16 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & MSR_EE) { if (ext) { - vec = BOOK3S_INTERRUPT_EXTERNAL; + inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0); } else { long int dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) dec = (int) dec; if (dec < 0) - vec = BOOK3S_INTERRUPT_DECREMENTER; + inject_interrupt(vcpu, + BOOK3S_INTERRUPT_DECREMENTER, 0); } } - if (vec) { - unsigned long msr, old_msr = vcpu->arch.shregs.msr; - - kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu)); - kvmppc_set_srr1(vcpu, old_msr); - kvmppc_set_pc(vcpu, vec); - msr = vcpu->arch.intr_msr; - if (MSR_TM_ACTIVE(old_msr)) - msr |= MSR_TS_S; - vcpu->arch.shregs.msr = msr; - } if (vcpu->arch.doorbell_request) { mtspr(SPRN_DPDES, 1); -- cgit From 6a13cb0c376abb436d060b989018257963656d0c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Oct 2019 16:00:24 +1000 Subject: KVM: PPC: Book3S HV: Implement LPCR[AIL]=3 mode for injected interrupts kvmppc_inject_interrupt does not implement LPCR[AIL]!=0 modes, which can result in the guest receiving interrupts as if LPCR[AIL]=0 contrary to the ISA. In practice, Linux guests cope with this deviation, but it should be fixed. Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_builtin.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 068bee941a71..7cd3cf3d366b 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -792,6 +792,21 @@ static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags) else new_msr |= msr & MSR_TS_MASK; + /* + * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and + * applicable. AIL=2 is not supported. + * + * AIL does not apply to SRESET, MCE, or HMI (which is never + * delivered to the guest), and does not apply if IR=0 or DR=0. + */ + if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET && + vec != BOOK3S_INTERRUPT_MACHINE_CHECK && + (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 && + (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) { + new_msr |= MSR_IR | MSR_DR; + new_pc += 0xC000000000004000ULL; + } + kvmppc_set_srr0(vcpu, pc); kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags); kvmppc_set_pc(vcpu, new_pc); -- cgit From 55d7004299eb917767761f01a208d50afad4f535 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Oct 2019 16:00:25 +1000 Subject: KVM: PPC: Book3S HV: Reject mflags=2 (LPCR[AIL]=2) ADDR_TRANS_MODE mode AIL=2 mode has no known users, so is not well tested or supported. Disallow guests from selecting this mode because it may become deprecated in future versions of the architecture. This policy decision is not left to QEMU because KVM support is required for AIL=2 (when injecting interrupts). Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c340d416dce3..ec5c0379296a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -779,6 +779,11 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawr = value1; vcpu->arch.dawrx = value2; return H_SUCCESS; + case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: + /* KVM does not support mflags=2 (AIL=2) */ + if (mflags != 0 && mflags != 3) + return H_UNSUPPORTED_FLAG_START; + return H_TOO_HARD; default: return H_TOO_HARD; } -- cgit