From 3c313524605a6afd8207448a8e9967f5e8cba734 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 6 Feb 2017 13:24:41 +1100 Subject: KVM: PPC: Book3S HV: Allow userspace to set the desired SMT mode This allows userspace to set the desired virtual SMT (simultaneous multithreading) mode for a VM, that is, the number of VCPUs that get assigned to each virtual core. Previously, the virtual SMT mode was fixed to the number of threads per subcore, and if userspace wanted to have fewer vcpus per vcore, then it would achieve that by using a sparse CPU numbering. This had the disadvantage that the vcpu numbers can get quite large, particularly for SMT1 guests on a POWER8 with 8 threads per core. With this patch, userspace can set its desired virtual SMT mode and then use contiguous vcpu numbering. On POWER8, where the threading mode is "strict", the virtual SMT mode must be less than or equal to the number of threads per subcore. On POWER9, which implements a "loose" threading mode, the virtual SMT mode can be any power of 2 between 1 and 8, even though there is effectively one thread per subcore, since the threads are independent and can all be in different partitions. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7f71ab5fcad1..da90ae8cd508 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -554,7 +554,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: r = 0; - if (hv_enabled) { + if (kvm) + r = kvm->arch.smt_mode; + else if (hv_enabled) { if (cpu_has_feature(CPU_FTR_ARCH_300)) r = 1; else @@ -1712,6 +1714,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; break; } + case KVM_CAP_PPC_SMT: { + unsigned long mode = cap->args[0]; + unsigned long flags = cap->args[1]; + + r = -EINVAL; + if (kvm->arch.kvm_ops->set_smt_mode) + r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags); + break; + } #endif default: r = -EINVAL; -- cgit From 579006944e0d675361e987c646b83d2d1725966c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 16 May 2017 16:41:20 +1000 Subject: KVM: PPC: Book3S HV: Virtualize doorbell facility on POWER9 On POWER9, we no longer have the restriction that we had on POWER8 where all threads in a core have to be in the same partition, so the CPU threads are now independent. However, we still want to be able to run guests with a virtual SMT topology, if only to allow migration of guests from POWER8 systems to POWER9. A guest that has a virtual SMT mode greater than 1 will expect to be able to use the doorbell facility; it will expect the msgsndp and msgclrp instructions to work appropriately and to be able to read sensible values from the TIR (thread identification register) and DPDES (directed privileged doorbell exception status) special-purpose registers. However, since each CPU thread is a separate sub-processor in POWER9, these instructions and registers can only be used within a single CPU thread. In order for these instructions to appear to act correctly according to the guest's virtual SMT mode, we have to trap and emulate them. We cause them to trap by clearing the HFSCR_MSGP bit in the HFSCR register. The emulation is triggered by the hypervisor facility unavailable interrupt that occurs when the guest uses them. To cause a doorbell interrupt to occur within the guest, we set the DPDES register to 1. If the guest has interrupts enabled, the CPU will generate a doorbell interrupt and clear the DPDES register in hardware. The DPDES hardware register for the guest is saved in the vcpu->arch.vcore->dpdes field. Since this gets written by the guest exit code, other VCPUs wishing to cause a doorbell interrupt don't write that field directly, but instead set a vcpu->arch.doorbell_request flag. This is consumed and set to 0 by the guest entry code, which then sets DPDES to 1. Emulating reads of the DPDES register is somewhat involved, because it requires reading the doorbell pending interrupt status of all of the VCPU threads in the virtual core, and if any of those VCPUs are running, their doorbell status is only up-to-date in the hardware DPDES registers of the CPUs where they are running. In order to get a reasonable approximation of the current doorbell status, we send those CPUs an IPI, causing an exit from the guest which will update the vcpu->arch.vcore->dpdes field. We then use that value in constructing the emulated DPDES register value. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index da90ae8cd508..8208c2b95a93 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -554,9 +554,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: r = 0; - if (kvm) - r = kvm->arch.smt_mode; - else if (hv_enabled) { + if (kvm) { + if (kvm->arch.emul_smt_mode > 1) + r = kvm->arch.emul_smt_mode; + else + r = kvm->arch.smt_mode; + } else if (hv_enabled) { if (cpu_has_feature(CPU_FTR_ARCH_300)) r = 1; else -- cgit From 134764ed6e12d9f99b3de68b8aaeae1ba842d91c Mon Sep 17 00:00:00 2001 From: Aravinda Prasad Date: Thu, 11 May 2017 16:32:48 +0530 Subject: KVM: PPC: Book3S HV: Add new capability to control MCE behaviour This introduces a new KVM capability to control how KVM behaves on machine check exception (MCE) in HV KVM guests. If this capability has not been enabled, KVM redirects machine check exceptions to guest's 0x200 vector, if the address in error belongs to the guest. With this capability enabled, KVM will cause a guest exit with the exit reason indicating an NMI. The new capability is required to avoid problems if a new kernel/KVM is used with an old QEMU, running a guest that doesn't issue "ibm,nmi-register". As old QEMU does not understand the NMI exit type, it treats it as a fatal error. However, the guest could have handled the machine check error if the exception was delivered to guest's 0x200 interrupt vector instead of NMI exit in case of old QEMU. [paulus@ozlabs.org - Reworded the commit message to be clearer, enable only on HV KVM.] Signed-off-by: Aravinda Prasad Reviewed-by: David Gibson Signed-off-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8208c2b95a93..ccaa7a407c15 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -623,6 +623,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) /* Disable this on POWER9 until code handles new HPTE format */ r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); break; +#endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = hv_enabled; + break; #endif case KVM_CAP_PPC_HTM: r = cpu_has_feature(CPU_FTR_TM_COMP) && @@ -1543,6 +1548,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(vcpu->kvm)) + break; + r = 0; + vcpu->kvm->arch.fwnmi_enabled = true; + break; +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ default: r = -EINVAL; break; -- cgit From 2ed4f9dd19c0f76f7fb56c4b201696d29149325c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Jun 2017 16:01:27 +1000 Subject: KVM: PPC: Book3S HV: Add capability to report possible virtual SMT modes Now that userspace can set the virtual SMT mode by enabling the KVM_CAP_PPC_SMT capability, it is useful for userspace to be able to query the set of possible virtual SMT modes. This provides a new capability, KVM_CAP_PPC_SMT_POSSIBLE, to provide this information. The return value is a bitmap of possible modes, with bit N set if virtual SMT mode 2^N is available. That is, 1 indicates SMT1 is available, 2 indicates that SMT2 is available, 3 indicates that both SMT1 and SMT2 are available, and so on. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/kvm/powerpc.c') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ccaa7a407c15..b14736f3870b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -566,6 +566,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = threads_per_subcore; } break; + case KVM_CAP_PPC_SMT_POSSIBLE: + r = 1; + if (hv_enabled) { + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + r = ((threads_per_subcore << 1) - 1); + else + /* P9 can emulate dbells, so allow any mode */ + r = 8 | 4 | 2 | 1; + } + break; case KVM_CAP_PPC_RMA: r = 0; break; -- cgit