summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c708
1 files changed, 533 insertions, 175 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 130bafdb1430..7667563fb9ff 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -391,13 +391,34 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
/* Dummy value used in computing PCR value below */
#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
+static inline unsigned long map_pcr_to_cap(unsigned long pcr)
+{
+ unsigned long cap = 0;
+
+ switch (pcr) {
+ case PCR_ARCH_300:
+ cap = H_GUEST_CAP_POWER9;
+ break;
+ case PCR_ARCH_31:
+ if (cpu_has_feature(CPU_FTR_P11_PVR))
+ cap = H_GUEST_CAP_POWER11;
+ else
+ cap = H_GUEST_CAP_POWER10;
+ break;
+ default:
+ break;
+ }
+
+ return cap;
+}
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
- unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
+ unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* We can (emulate) our own architecture version and anything older */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
+ if (cpu_has_feature(CPU_FTR_P11_PVR) || cpu_has_feature(CPU_FTR_ARCH_31))
host_pcr_bit = PCR_ARCH_31;
else if (cpu_has_feature(CPU_FTR_ARCH_300))
host_pcr_bit = PCR_ARCH_300;
@@ -426,6 +447,7 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
guest_pcr_bit = PCR_ARCH_300;
break;
case PVR_ARCH_31:
+ case PVR_ARCH_31_P11:
guest_pcr_bit = PCR_ARCH_31;
break;
default:
@@ -437,8 +459,20 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
if (guest_pcr_bit > host_pcr_bit)
return -EINVAL;
+ if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) {
+ /*
+ * 'arch_compat == 0' would mean the guest should default to
+ * L1's compatibility. In this case, the guest would pick
+ * host's PCR and evaluate the corresponding capabilities.
+ */
+ cap = map_pcr_to_cap(guest_pcr_bit);
+ if (!(cap & nested_capabilities))
+ return -EINVAL;
+ }
+
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LOGICAL_PVR);
/*
* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
* Also set all reserved PCR bits
@@ -642,7 +676,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
return err;
}
-static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap,
+ struct kvmppc_vpa *old_vpap)
{
struct kvm *kvm = vcpu->kvm;
void *va;
@@ -682,9 +717,8 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
- if (vpap->pinned_addr)
- kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
- vpap->dirty);
+ *old_vpap = *vpap;
+
vpap->gpa = gpa;
vpap->pinned_addr = va;
vpap->dirty = false;
@@ -694,6 +728,9 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_vpa old_vpa = { 0 };
+
if (!(vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending))
@@ -701,17 +738,34 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
- if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_update_vpa(vcpu, &vcpu->arch.vpa, &old_vpa);
+ if (old_vpa.pinned_addr) {
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, ~0ull);
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+ if (vcpu->arch.vpa.pinned_addr) {
init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, __pa(vcpu->arch.vpa.pinned_addr));
+ }
}
if (vcpu->arch.dtl.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+ kvmppc_update_vpa(vcpu, &vcpu->arch.dtl, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
vcpu->arch.dtl_index = 0;
}
- if (vcpu->arch.slb_shadow.update_pending)
- kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+ if (vcpu->arch.slb_shadow.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+
spin_unlock(&vcpu->arch.vpa_update_lock);
}
@@ -794,7 +848,7 @@ static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
- __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen);
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + kvmppc_get_tb_offset(vcpu), stolen);
vcpu->arch.vpa.dirty = true;
}
@@ -845,9 +899,9 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
+ if (kvmppc_get_arch_compat(vcpu) >= PVR_ARCH_207)
return true;
- if ((!vcpu->arch.vcore->arch_compat) &&
+ if ((!kvmppc_get_arch_compat(vcpu)) &&
cpu_has_feature(CPU_FTR_ARCH_207S))
return true;
return false;
@@ -868,7 +922,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
/* Guests can't breakpoint the hypervisor */
if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
return H_P3;
- vcpu->arch.ciabr = value1;
+ kvmppc_set_ciabr_hv(vcpu, value1);
return H_SUCCESS;
case H_SET_MODE_RESOURCE_SET_DAWR0:
if (!kvmppc_power8_compatible(vcpu))
@@ -879,8 +933,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
return H_P4;
- vcpu->arch.dawr0 = value1;
- vcpu->arch.dawrx0 = value2;
+ kvmppc_set_dawr0_hv(vcpu, value1);
+ kvmppc_set_dawrx0_hv(vcpu, value2);
return H_SUCCESS;
case H_SET_MODE_RESOURCE_SET_DAWR1:
if (!kvmppc_power8_compatible(vcpu))
@@ -895,8 +949,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
return H_P4;
- vcpu->arch.dawr1 = value1;
- vcpu->arch.dawrx1 = value2;
+ kvmppc_set_dawr1_hv(vcpu, value1);
+ kvmppc_set_dawrx1_hv(vcpu, value2);
return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/*
@@ -1267,10 +1321,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
break;
#endif
- case H_RANDOM:
- if (!arch_get_random_seed_longs(&vcpu->arch.regs.gpr[4], 1))
+ case H_RANDOM: {
+ unsigned long rand;
+
+ if (!arch_get_random_seed_longs(&rand, 1))
ret = H_HARDWARE;
+ kvmppc_set_gpr(vcpu, 4, rand);
break;
+ }
case H_RPT_INVALIDATE:
ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
@@ -1370,7 +1428,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
*/
static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
- vcpu->arch.shregs.msr |= MSR_EE;
+ __kvmppc_set_msr_hv(vcpu, __kvmppc_get_msr_hv(vcpu) | MSR_EE);
vcpu->arch.ceded = 1;
smp_mb();
if (vcpu->arch.prodded) {
@@ -1544,7 +1602,7 @@ static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
return EMULATE_FAIL;
- vcpu->arch.hfscr |= HFSCR_PM;
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PM);
return RESUME_GUEST;
}
@@ -1554,7 +1612,7 @@ static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
return EMULATE_FAIL;
- vcpu->arch.hfscr |= HFSCR_EBB;
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_EBB);
return RESUME_GUEST;
}
@@ -1564,7 +1622,7 @@ static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
return EMULATE_FAIL;
- vcpu->arch.hfscr |= HFSCR_TM;
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
return RESUME_GUEST;
}
@@ -1585,7 +1643,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* That can happen due to a bug, or due to a machine check
* occurring at just the wrong time.
*/
- if (vcpu->arch.shregs.msr & MSR_HV) {
+ if (!kvmhv_is_nestedv2() && (__kvmppc_get_msr_hv(vcpu) & MSR_HV)) {
printk(KERN_EMERG "KVM trap in HV mode!\n");
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
@@ -1636,7 +1694,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* so that it knows that the machine check occurred.
*/
if (!vcpu->kvm->arch.fwnmi_enabled) {
- ulong flags = (vcpu->arch.shregs.msr & 0x083c0000) |
+ ulong flags = (__kvmppc_get_msr_hv(vcpu) & 0x083c0000) |
(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
kvmppc_core_queue_machine_check(vcpu, flags);
r = RESUME_GUEST;
@@ -1666,7 +1724,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* as a result of a hypervisor emulation interrupt
* (e40) getting turned into a 700 by BML RTAS.
*/
- flags = (vcpu->arch.shregs.msr & 0x1f0000ull) |
+ flags = (__kvmppc_get_msr_hv(vcpu) & 0x1f0000ull) |
(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
@@ -1676,7 +1734,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
{
int i;
- if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (!kvmhv_is_nestedv2() && unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
/*
* Guest userspace executed sc 1. This can only be
* reached by the P9 path because the old path
@@ -1754,7 +1812,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
break;
}
- if (!(vcpu->arch.shregs.msr & MSR_DR))
+ if (!(__kvmppc_get_msr_hv(vcpu) & MSR_DR))
vsid = vcpu->kvm->arch.vrma_slb_v;
else
vsid = vcpu->arch.fault_gpa;
@@ -1778,7 +1836,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
long err;
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
- vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
+ vcpu->arch.fault_dsisr = __kvmppc_get_msr_hv(vcpu) &
DSISR_SRR1_MATCH_64S;
if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
@@ -1787,7 +1845,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* hash fault handling below is v3 only (it uses ASDR
* via fault_gpa).
*/
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
r = RESUME_PAGE_FAULT;
break;
@@ -1801,7 +1859,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
break;
}
- if (!(vcpu->arch.shregs.msr & MSR_IR))
+ if (!(__kvmppc_get_msr_hv(vcpu) & MSR_IR))
vsid = vcpu->kvm->arch.vrma_slb_v;
else
vsid = vcpu->arch.fault_gpa;
@@ -1863,18 +1921,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
- u64 cause = vcpu->arch.hfscr >> 56;
+ u64 cause = kvmppc_get_hfscr_hv(vcpu) >> 56;
r = EMULATE_FAIL;
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (cause == FSCR_MSGP_LG)
+ switch (cause) {
+ case FSCR_MSGP_LG:
r = kvmppc_emulate_doorbell_instr(vcpu);
- if (cause == FSCR_PM_LG)
+ break;
+ case FSCR_PM_LG:
r = kvmppc_pmu_unavailable(vcpu);
- if (cause == FSCR_EBB_LG)
+ break;
+ case FSCR_EBB_LG:
r = kvmppc_ebb_unavailable(vcpu);
- if (cause == FSCR_TM_LG)
+ break;
+ case FSCR_TM_LG:
r = kvmppc_tm_unavailable(vcpu);
+ break;
+ default:
+ break;
+ }
}
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
@@ -1891,7 +1957,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
kvmppc_dump_regs(vcpu);
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
- vcpu->arch.shregs.msr);
+ __kvmppc_get_msr_hv(vcpu));
run->hw.hardware_exit_reason = vcpu->arch.trap;
r = RESUME_HOST;
break;
@@ -1915,11 +1981,11 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
* That can happen due to a bug, or due to a machine check
* occurring at just the wrong time.
*/
- if (vcpu->arch.shregs.msr & MSR_HV) {
+ if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) {
pr_emerg("KVM trap in HV mode while nested!\n");
pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
- vcpu->arch.shregs.msr);
+ __kvmppc_get_msr_hv(vcpu));
kvmppc_dump_regs(vcpu);
return RESUME_HOST;
}
@@ -1976,7 +2042,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
DSISR_SRR1_MATCH_64S;
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvmhv_nested_page_fault(vcpu);
@@ -1997,36 +2063,9 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
fallthrough; /* go to facility unavailable handler */
#endif
- case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
- u64 cause = vcpu->arch.hfscr >> 56;
-
- /*
- * Only pass HFU interrupts to the L1 if the facility is
- * permitted but disabled by the L1's HFSCR, otherwise
- * the interrupt does not make sense to the L1 so turn
- * it into a HEAI.
- */
- if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
- (vcpu->arch.nested_hfscr & (1UL << cause))) {
- ppc_inst_t pinst;
- vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
-
- /*
- * If the fetch failed, return to guest and
- * try executing it again.
- */
- r = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
- vcpu->arch.emul_inst = ppc_inst_val(pinst);
- if (r != EMULATE_DONE)
- r = RESUME_GUEST;
- else
- r = RESUME_HOST;
- } else {
- r = RESUME_HOST;
- }
-
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ r = RESUME_HOST;
break;
- }
case BOOK3S_INTERRUPT_HV_RM_HARD:
vcpu->arch.trap = 0;
@@ -2183,6 +2222,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
}
vc->lpcr = new_lpcr;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
spin_unlock(&vc->lock);
}
@@ -2207,64 +2247,64 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.dabrx);
break;
case KVM_REG_PPC_DSCR:
- *val = get_reg_val(id, vcpu->arch.dscr);
+ *val = get_reg_val(id, kvmppc_get_dscr_hv(vcpu));
break;
case KVM_REG_PPC_PURR:
- *val = get_reg_val(id, vcpu->arch.purr);
+ *val = get_reg_val(id, kvmppc_get_purr_hv(vcpu));
break;
case KVM_REG_PPC_SPURR:
- *val = get_reg_val(id, vcpu->arch.spurr);
+ *val = get_reg_val(id, kvmppc_get_spurr_hv(vcpu));
break;
case KVM_REG_PPC_AMR:
- *val = get_reg_val(id, vcpu->arch.amr);
+ *val = get_reg_val(id, kvmppc_get_amr_hv(vcpu));
break;
case KVM_REG_PPC_UAMOR:
- *val = get_reg_val(id, vcpu->arch.uamor);
+ *val = get_reg_val(id, kvmppc_get_uamor_hv(vcpu));
break;
case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
- *val = get_reg_val(id, vcpu->arch.mmcr[i]);
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, i));
break;
case KVM_REG_PPC_MMCR2:
- *val = get_reg_val(id, vcpu->arch.mmcr[2]);
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 2));
break;
case KVM_REG_PPC_MMCRA:
- *val = get_reg_val(id, vcpu->arch.mmcra);
+ *val = get_reg_val(id, kvmppc_get_mmcra_hv(vcpu));
break;
case KVM_REG_PPC_MMCRS:
*val = get_reg_val(id, vcpu->arch.mmcrs);
break;
case KVM_REG_PPC_MMCR3:
- *val = get_reg_val(id, vcpu->arch.mmcr[3]);
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 3));
break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
- *val = get_reg_val(id, vcpu->arch.pmc[i]);
+ *val = get_reg_val(id, kvmppc_get_pmc_hv(vcpu, i));
break;
case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
i = id - KVM_REG_PPC_SPMC1;
*val = get_reg_val(id, vcpu->arch.spmc[i]);
break;
case KVM_REG_PPC_SIAR:
- *val = get_reg_val(id, vcpu->arch.siar);
+ *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu));
break;
case KVM_REG_PPC_SDAR:
- *val = get_reg_val(id, vcpu->arch.sdar);
+ *val = get_reg_val(id, kvmppc_get_sdar_hv(vcpu));
break;
case KVM_REG_PPC_SIER:
- *val = get_reg_val(id, vcpu->arch.sier[0]);
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 0));
break;
case KVM_REG_PPC_SIER2:
- *val = get_reg_val(id, vcpu->arch.sier[1]);
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 1));
break;
case KVM_REG_PPC_SIER3:
- *val = get_reg_val(id, vcpu->arch.sier[2]);
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 2));
break;
case KVM_REG_PPC_IAMR:
- *val = get_reg_val(id, vcpu->arch.iamr);
+ *val = get_reg_val(id, kvmppc_get_iamr_hv(vcpu));
break;
case KVM_REG_PPC_PSPB:
- *val = get_reg_val(id, vcpu->arch.pspb);
+ *val = get_reg_val(id, kvmppc_get_pspb_hv(vcpu));
break;
case KVM_REG_PPC_DPDES:
/*
@@ -2279,22 +2319,31 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
case KVM_REG_PPC_VTB:
- *val = get_reg_val(id, vcpu->arch.vcore->vtb);
+ *val = get_reg_val(id, kvmppc_get_vtb(vcpu));
break;
case KVM_REG_PPC_DAWR:
- *val = get_reg_val(id, vcpu->arch.dawr0);
+ *val = get_reg_val(id, kvmppc_get_dawr0_hv(vcpu));
break;
case KVM_REG_PPC_DAWRX:
- *val = get_reg_val(id, vcpu->arch.dawrx0);
+ *val = get_reg_val(id, kvmppc_get_dawrx0_hv(vcpu));
break;
case KVM_REG_PPC_DAWR1:
- *val = get_reg_val(id, vcpu->arch.dawr1);
+ *val = get_reg_val(id, kvmppc_get_dawr1_hv(vcpu));
break;
case KVM_REG_PPC_DAWRX1:
- *val = get_reg_val(id, vcpu->arch.dawrx1);
+ *val = get_reg_val(id, kvmppc_get_dawrx1_hv(vcpu));
+ break;
+ case KVM_REG_PPC_DEXCR:
+ *val = get_reg_val(id, kvmppc_get_dexcr_hv(vcpu));
+ break;
+ case KVM_REG_PPC_HASHKEYR:
+ *val = get_reg_val(id, kvmppc_get_hashkeyr_hv(vcpu));
+ break;
+ case KVM_REG_PPC_HASHPKEYR:
+ *val = get_reg_val(id, kvmppc_get_hashpkeyr_hv(vcpu));
break;
case KVM_REG_PPC_CIABR:
- *val = get_reg_val(id, vcpu->arch.ciabr);
+ *val = get_reg_val(id, kvmppc_get_ciabr_hv(vcpu));
break;
case KVM_REG_PPC_CSIGR:
*val = get_reg_val(id, vcpu->arch.csigr);
@@ -2306,13 +2355,13 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.tcscr);
break;
case KVM_REG_PPC_PID:
- *val = get_reg_val(id, vcpu->arch.pid);
+ *val = get_reg_val(id, kvmppc_get_pid(vcpu));
break;
case KVM_REG_PPC_ACOP:
*val = get_reg_val(id, vcpu->arch.acop);
break;
case KVM_REG_PPC_WORT:
- *val = get_reg_val(id, vcpu->arch.wort);
+ *val = get_reg_val(id, kvmppc_get_wort_hv(vcpu));
break;
case KVM_REG_PPC_TIDR:
*val = get_reg_val(id, vcpu->arch.tid);
@@ -2338,14 +2387,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
spin_unlock(&vcpu->arch.vpa_update_lock);
break;
case KVM_REG_PPC_TB_OFFSET:
- *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+ *val = get_reg_val(id, kvmppc_get_tb_offset(vcpu));
break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
- *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+ *val = get_reg_val(id, kvmppc_get_lpcr(vcpu));
break;
case KVM_REG_PPC_PPR:
- *val = get_reg_val(id, vcpu->arch.ppr);
+ *val = get_reg_val(id, kvmppc_get_ppr_hv(vcpu));
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_REG_PPC_TFHAR:
@@ -2414,10 +2463,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
break;
#endif
case KVM_REG_PPC_ARCH_COMPAT:
- *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+ *val = get_reg_val(id, kvmppc_get_arch_compat(vcpu));
break;
case KVM_REG_PPC_DEC_EXPIRY:
- *val = get_reg_val(id, vcpu->arch.dec_expires);
+ *val = get_reg_val(id, kvmppc_get_dec_expires(vcpu));
break;
case KVM_REG_PPC_ONLINE:
*val = get_reg_val(id, vcpu->arch.online);
@@ -2425,6 +2474,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_PTCR:
*val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
break;
+ case KVM_REG_PPC_FSCR:
+ *val = get_reg_val(id, kvmppc_get_fscr_hv(vcpu));
+ break;
default:
r = -EINVAL;
break;
@@ -2453,64 +2505,64 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
break;
case KVM_REG_PPC_DSCR:
- vcpu->arch.dscr = set_reg_val(id, *val);
+ kvmppc_set_dscr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PURR:
- vcpu->arch.purr = set_reg_val(id, *val);
+ kvmppc_set_purr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SPURR:
- vcpu->arch.spurr = set_reg_val(id, *val);
+ kvmppc_set_spurr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_AMR:
- vcpu->arch.amr = set_reg_val(id, *val);
+ kvmppc_set_amr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_UAMOR:
- vcpu->arch.uamor = set_reg_val(id, *val);
+ kvmppc_set_uamor_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
- vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+ kvmppc_set_mmcr_hv(vcpu, i, set_reg_val(id, *val));
break;
case KVM_REG_PPC_MMCR2:
- vcpu->arch.mmcr[2] = set_reg_val(id, *val);
+ kvmppc_set_mmcr_hv(vcpu, 2, set_reg_val(id, *val));
break;
case KVM_REG_PPC_MMCRA:
- vcpu->arch.mmcra = set_reg_val(id, *val);
+ kvmppc_set_mmcra_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_MMCRS:
vcpu->arch.mmcrs = set_reg_val(id, *val);
break;
case KVM_REG_PPC_MMCR3:
- *val = get_reg_val(id, vcpu->arch.mmcr[3]);
+ kvmppc_set_mmcr_hv(vcpu, 3, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
- vcpu->arch.pmc[i] = set_reg_val(id, *val);
+ kvmppc_set_pmc_hv(vcpu, i, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
i = id - KVM_REG_PPC_SPMC1;
vcpu->arch.spmc[i] = set_reg_val(id, *val);
break;
case KVM_REG_PPC_SIAR:
- vcpu->arch.siar = set_reg_val(id, *val);
+ kvmppc_set_siar_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SDAR:
- vcpu->arch.sdar = set_reg_val(id, *val);
+ kvmppc_set_sdar_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SIER:
- vcpu->arch.sier[0] = set_reg_val(id, *val);
+ kvmppc_set_sier_hv(vcpu, 0, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SIER2:
- vcpu->arch.sier[1] = set_reg_val(id, *val);
+ kvmppc_set_sier_hv(vcpu, 1, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SIER3:
- vcpu->arch.sier[2] = set_reg_val(id, *val);
+ kvmppc_set_sier_hv(vcpu, 2, set_reg_val(id, *val));
break;
case KVM_REG_PPC_IAMR:
- vcpu->arch.iamr = set_reg_val(id, *val);
+ kvmppc_set_iamr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PSPB:
- vcpu->arch.pspb = set_reg_val(id, *val);
+ kvmppc_set_pspb_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DPDES:
if (cpu_has_feature(CPU_FTR_ARCH_300))
@@ -2519,25 +2571,34 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
- vcpu->arch.vcore->vtb = set_reg_val(id, *val);
+ kvmppc_set_vtb(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DAWR:
- vcpu->arch.dawr0 = set_reg_val(id, *val);
+ kvmppc_set_dawr0_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DAWRX:
- vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
+ kvmppc_set_dawrx0_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
break;
case KVM_REG_PPC_DAWR1:
- vcpu->arch.dawr1 = set_reg_val(id, *val);
+ kvmppc_set_dawr1_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DAWRX1:
- vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP;
+ kvmppc_set_dawrx1_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
+ break;
+ case KVM_REG_PPC_DEXCR:
+ kvmppc_set_dexcr_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_HASHKEYR:
+ kvmppc_set_hashkeyr_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_HASHPKEYR:
+ kvmppc_set_hashpkeyr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_CIABR:
- vcpu->arch.ciabr = set_reg_val(id, *val);
+ kvmppc_set_ciabr_hv(vcpu, set_reg_val(id, *val));
/* Don't allow setting breakpoints in hypervisor code */
- if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
- vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */
+ if ((kvmppc_get_ciabr_hv(vcpu) & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ kvmppc_set_ciabr_hv(vcpu, kvmppc_get_ciabr_hv(vcpu) & ~CIABR_PRIV);
break;
case KVM_REG_PPC_CSIGR:
vcpu->arch.csigr = set_reg_val(id, *val);
@@ -2549,13 +2610,13 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.tcscr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_PID:
- vcpu->arch.pid = set_reg_val(id, *val);
+ kvmppc_set_pid(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_ACOP:
vcpu->arch.acop = set_reg_val(id, *val);
break;
case KVM_REG_PPC_WORT:
- vcpu->arch.wort = set_reg_val(id, *val);
+ kvmppc_set_wort_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_TIDR:
vcpu->arch.tid = set_reg_val(id, *val);
@@ -2602,10 +2663,11 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
* decrementer, which is better than a large one that
* causes a hang.
*/
- if (!vcpu->arch.dec_expires && tb_offset)
- vcpu->arch.dec_expires = get_tb() + tb_offset;
+ kvmppc_set_tb_offset(vcpu, tb_offset);
+ if (!kvmppc_get_dec_expires(vcpu) && tb_offset)
+ kvmppc_set_dec_expires(vcpu, get_tb() + tb_offset);
- vcpu->arch.vcore->tb_offset = tb_offset;
+ kvmppc_set_tb_offset(vcpu, tb_offset);
break;
}
case KVM_REG_PPC_LPCR:
@@ -2615,7 +2677,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
break;
case KVM_REG_PPC_PPR:
- vcpu->arch.ppr = set_reg_val(id, *val);
+ kvmppc_set_ppr_hv(vcpu, set_reg_val(id, *val));
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_REG_PPC_TFHAR:
@@ -2686,7 +2748,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DEC_EXPIRY:
- vcpu->arch.dec_expires = set_reg_val(id, *val);
+ kvmppc_set_dec_expires(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_ONLINE:
i = set_reg_val(id, *val);
@@ -2699,6 +2761,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_PTCR:
vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_FSCR:
+ kvmppc_set_fscr_hv(vcpu, set_reg_val(id, *val));
+ break;
default:
r = -EINVAL;
break;
@@ -2916,19 +2981,26 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
vcpu->arch.shared_big_endian = false;
#endif
#endif
- vcpu->arch.mmcr[0] = MMCR0_FC;
+
+ if (kvmhv_is_nestedv2()) {
+ err = kvmhv_nestedv2_vcpu_create(vcpu, &vcpu->arch.nestedv2_io);
+ if (err < 0)
+ return err;
+ }
+
+ kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
- vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+ kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT);
+ kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE);
}
- vcpu->arch.ctrl = CTRL_RUNLATCH;
+ kvmppc_set_ctrl_hv(vcpu, CTRL_RUNLATCH);
/* default to host PVR, since we can't spoof it */
kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
spin_lock_init(&vcpu->arch.vpa_update_lock);
spin_lock_init(&vcpu->arch.tbacct_lock);
vcpu->arch.busy_preempt = TB_NIL;
- vcpu->arch.shregs.msr = MSR_ME;
+ __kvmppc_set_msr_hv(vcpu, MSR_ME);
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
/*
@@ -2938,29 +3010,30 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
* don't set the HFSCR_MSGP bit, and that causes those instructions
* to trap and then we emulate them.
*/
- vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
- HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+ kvmppc_set_hfscr_hv(vcpu, HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP);
/* On POWER10 and later, allow prefixed instructions */
if (cpu_has_feature(CPU_FTR_ARCH_31))
- vcpu->arch.hfscr |= HFSCR_PREFIX;
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PREFIX);
if (cpu_has_feature(CPU_FTR_HVMODE)) {
- vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & mfspr(SPRN_HFSCR));
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
- vcpu->arch.hfscr |= HFSCR_TM;
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
#endif
}
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
- vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+ vcpu->arch.hfscr_permitted = kvmppc_get_hfscr_hv(vcpu);
/*
* PM, EBB, TM are demand-faulted so start with it clear.
*/
- vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM);
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM));
kvmppc_mmu_book3s_hv_init(vcpu);
@@ -3071,6 +3144,8 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_vcpu_free(vcpu, &vcpu->arch.nestedv2_io);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -3958,7 +4033,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/* Return to whole-core mode if we split the core earlier */
if (cmd_bit) {
unsigned long hid0 = mfspr(SPRN_HID0);
- unsigned long loops = 0;
hid0 &= ~HID0_POWER8_DYNLPARDIS;
stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
@@ -3970,7 +4044,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (!(hid0 & stat_bit))
break;
cpu_relax();
- ++loops;
}
split_info.do_nap = 0;
}
@@ -4035,10 +4108,207 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
}
}
+/* Helper functions for reading L2's stats from L1's VPA */
+#ifdef CONFIG_PPC_PSERIES
+static DEFINE_PER_CPU(u64, l1_to_l2_cs);
+static DEFINE_PER_CPU(u64, l2_to_l1_cs);
+static DEFINE_PER_CPU(u64, l2_runtime_agg);
+
+int kvmhv_get_l2_counters_status(void)
+{
+ return firmware_has_feature(FW_FEATURE_LPAR) &&
+ get_lppaca()->l2_counters_enable;
+}
+
+void kvmhv_set_l2_counters_status(int cpu, bool status)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return;
+ if (status)
+ lppaca_of(cpu).l2_counters_enable = 1;
+ else
+ lppaca_of(cpu).l2_counters_enable = 0;
+}
+EXPORT_SYMBOL(kvmhv_set_l2_counters_status);
+
+int kvmhv_counters_tracepoint_regfunc(void)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ kvmhv_set_l2_counters_status(cpu, true);
+ }
+ return 0;
+}
+
+void kvmhv_counters_tracepoint_unregfunc(void)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ kvmhv_set_l2_counters_status(cpu, false);
+ }
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp = get_lppaca();
+ u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
+ u64 *l1_to_l2_cs_ptr = this_cpu_ptr(&l1_to_l2_cs);
+ u64 *l2_to_l1_cs_ptr = this_cpu_ptr(&l2_to_l1_cs);
+ u64 *l2_runtime_agg_ptr = this_cpu_ptr(&l2_runtime_agg);
+
+ l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
+ l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
+ l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
+ trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - *l1_to_l2_cs_ptr,
+ l2_to_l1_ns - *l2_to_l1_cs_ptr,
+ l2_runtime_ns - *l2_runtime_agg_ptr);
+ *l1_to_l2_cs_ptr = l1_to_l2_ns;
+ *l2_to_l1_cs_ptr = l2_to_l1_ns;
+ *l2_runtime_agg_ptr = l2_runtime_ns;
+ vcpu->arch.l1_to_l2_cs = l1_to_l2_ns;
+ vcpu->arch.l2_to_l1_cs = l2_to_l1_ns;
+ vcpu->arch.l2_runtime_agg = l2_runtime_ns;
+}
+
+u64 kvmhv_get_l1_to_l2_cs_time(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l1_to_l2_cs_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time);
+
+u64 kvmhv_get_l2_to_l1_cs_time(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l2_to_l1_cs_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time);
+
+u64 kvmhv_get_l2_runtime_agg(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l2_runtime_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg);
+
+u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l1_to_l2_cs;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time_vcpu);
+
+u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l2_to_l1_cs;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time_vcpu);
+
+u64 kvmhv_get_l2_runtime_agg_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l2_runtime_agg;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg_vcpu);
+
+#else
+int kvmhv_get_l2_counters_status(void)
+{
+ return 0;
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr, u64 *tb)
+{
+ struct kvmhv_nestedv2_io *io;
+ unsigned long msr, i;
+ int trap;
+ long rc;
+
+ if (vcpu->arch.doorbell_request) {
+ vcpu->arch.doorbell_request = 0;
+ kvmppc_set_dpdes(vcpu, 1);
+ }
+
+ io = &vcpu->arch.nestedv2_io;
+
+ msr = mfmsr();
+ kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending())
+ return 0;
+
+ rc = kvmhv_nestedv2_flush_vcpu(vcpu, time_limit);
+ if (rc < 0)
+ return -EINVAL;
+
+ kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr);
+
+ accumulate_time(vcpu, &vcpu->arch.in_guest);
+ rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
+ &trap, &i);
+
+ if (rc != H_SUCCESS) {
+ pr_err("KVM Guest Run VCPU hcall failed\n");
+ if (rc == H_INVALID_ELEMENT_ID)
+ pr_err("KVM: Guest Run VCPU invalid element id at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ pr_err("KVM: Guest Run VCPU invalid element size at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ pr_err("KVM: Guest Run VCPU invalid element value at %ld\n", i);
+ return -EINVAL;
+ }
+ accumulate_time(vcpu, &vcpu->arch.guest_exit);
+
+ *tb = mftb();
+ kvmppc_gsm_reset(io->vcpu_message);
+ kvmppc_gsm_reset(io->vcore_message);
+ kvmppc_gsbm_zero(&io->valids);
+
+ rc = kvmhv_nestedv2_parse_output(vcpu);
+ if (rc < 0)
+ return -EINVAL;
+
+ timer_rearm_host_dec(*tb);
+
+ /* Record context switch and guest_run_time data */
+ if (kvmhv_get_l2_counters_status())
+ do_trace_nested_cs_time(vcpu);
+
+ return trap;
+}
+
/* call our hypervisor to load up HV regs and go */
static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_psscr;
unsigned long msr;
struct hv_guest_state hvregs;
@@ -4083,6 +4353,15 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
hvregs.hdec_expiry = time_limit;
/*
+ * hvregs has the doorbell status, so zero it here which
+ * enables us to receive doorbells when H_ENTER_NESTED is
+ * in progress for this vCPU
+ */
+
+ if (vcpu->arch.doorbell_request)
+ vcpu->arch.doorbell_request = 0;
+
+ /*
* When setting DEC, we must always deal with irq_work_raise
* via NMI vs setting DEC. The problem occurs right as we
* switch into guest mode if a NMI hits and sets pending work
@@ -4118,7 +4397,7 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
*tb = mftb();
- vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
+ vcpu->arch.dec_expires = dec + (*tb + kvmppc_get_tb_offset(vcpu));
timer_rearm_host_dec(*tb);
@@ -4153,7 +4432,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu_vpa_increment_dispatch(vcpu);
if (kvmhv_on_pseries()) {
- trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+ if (kvmhv_is_nestedv1())
+ trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+ else
+ trap = kvmhv_vcpu_entry_nestedv2(vcpu, time_limit, lpcr, tb);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested &&
@@ -4176,7 +4458,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
__this_cpu_write(cpu_in_guest, NULL);
if (trap == BOOK3S_INTERRUPT_SYSCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ !(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
unsigned long req = kvmppc_get_gpr(vcpu, 3);
/*
@@ -4655,16 +4937,33 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
- if (vcpu->arch.shregs.msr & MSR_EE) {
- if (xive_interrupt_pending(vcpu))
+ if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+ &vcpu->arch.pending_exceptions) ||
+ xive_interrupt_pending(vcpu)) {
+ /*
+ * For nested HV, don't synthesize but always pass MER,
+ * the L0 will be able to optimise that more
+ * effectively than manipulating registers directly.
+ */
+ if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE))
kvmppc_inject_interrupt_hv(vcpu,
- BOOK3S_INTERRUPT_EXTERNAL, 0);
- } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
- &vcpu->arch.pending_exceptions)) {
- lpcr |= LPCR_MER;
+ BOOK3S_INTERRUPT_EXTERNAL, 0);
+ else
+ lpcr |= LPCR_MER;
+ } else {
+ /*
+ * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit
+ * unexpectedly set - for e.g. during NMI handling when all register
+ * states are synchronized from L0 to L1. L1 needs to inform L0 about
+ * MER=1 only when there are pending external interrupts.
+ * In the above if check, MER bit is set if there are pending
+ * external interrupts. Hence, explicitly mask off MER bit
+ * here as otherwise it may generate spurious interrupts in L2 KVM
+ * causing an endless loop, which results in L2 guest getting hung.
+ */
+ lpcr &= ~LPCR_MER;
}
} else if (vcpu->arch.pending_exceptions ||
- vcpu->arch.doorbell_request ||
xive_interrupt_pending(vcpu)) {
vcpu->arch.ret = RESUME_HOST;
goto out;
@@ -4677,7 +4976,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
tb = mftb();
- kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset);
+ kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + kvmppc_get_tb_offset(vcpu));
trace_kvm_guest_enter(vcpu);
@@ -4844,7 +5143,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
msr |= MSR_VSX;
if ((cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
- (vcpu->arch.hfscr & HFSCR_TM))
+ (kvmppc_get_hfscr_hv(vcpu) & HFSCR_TM))
msr |= MSR_TM;
msr = msr_check_and_set(msr);
@@ -4868,7 +5167,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
accumulate_time(vcpu, &vcpu->arch.hcall);
- if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (!kvmhv_is_nestedv2() && WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
/*
* These should have been caught reflected
* into the guest by now. Final sanity check:
@@ -5133,6 +5432,14 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
if (++cores_done >= kvm->arch.online_vcores)
break;
}
+
+ if (kvmhv_is_nestedv2()) {
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
+ }
+ }
}
void kvmppc_setup_partition_table(struct kvm *kvm)
@@ -5399,15 +5706,43 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/* Allocate the guest's logical partition ID */
- lpid = kvmppc_alloc_lpid();
- if ((long)lpid < 0)
- return -ENOMEM;
- kvm->arch.lpid = lpid;
+ if (!kvmhv_is_nestedv2()) {
+ lpid = kvmppc_alloc_lpid();
+ if ((long)lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
+ }
kvmppc_alloc_host_rm_ops();
kvmhv_vm_nested_init(kvm);
+ if (kvmhv_is_nestedv2()) {
+ long rc;
+ unsigned long guest_id;
+
+ rc = plpar_guest_create(0, &guest_id);
+
+ if (rc != H_SUCCESS)
+ pr_err("KVM: Create Guest hcall failed, rc=%ld\n", rc);
+
+ switch (rc) {
+ case H_PARAMETER:
+ case H_FUNCTION:
+ case H_STATE:
+ return -EINVAL;
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_ABORTED:
+ return -ENOMEM;
+ case H_AUTHORITY:
+ return -EPERM;
+ case H_NOT_AVAILABLE:
+ return -EBUSY;
+ }
+ kvm->arch.lpid = guest_id;
+ }
+
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -5477,7 +5812,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
lpcr |= LPCR_HAIL;
ret = kvmppc_init_vm_radix(kvm);
if (ret) {
- kvmppc_free_lpid(kvm->arch.lpid);
+ if (kvmhv_is_nestedv2())
+ plpar_guest_delete(0, kvm->arch.lpid);
+ else
+ kvmppc_free_lpid(kvm->arch.lpid);
return ret;
}
kvmppc_setup_partition_table(kvm);
@@ -5567,10 +5905,16 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvm->arch.process_table = 0;
if (kvm->arch.secure_guest)
uv_svm_terminate(kvm->arch.lpid);
- kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
+ if (!kvmhv_is_nestedv2())
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
- kvmppc_free_lpid(kvm->arch.lpid);
+ if (kvmhv_is_nestedv2()) {
+ kvmhv_flush_lpid(kvm->arch.lpid);
+ plpar_guest_delete(0, kvm->arch.lpid);
+ } else {
+ kvmppc_free_lpid(kvm->arch.lpid);
+ }
kvmppc_free_pimap(kvm);
}
@@ -5697,7 +6041,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
* the underlying calls, which will EOI the interrupt in real
* mode, need an HW IRQ number mapped in the XICS IRQ domain.
*/
- host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+ host_data = irq_domain_get_irq_data(irq_get_default_domain(), host_irq);
irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
if (i == pimap->n_mapped)
@@ -5982,6 +6326,8 @@ static int kvmhv_enable_nested(struct kvm *kvm)
return -ENODEV;
if (!radix_enabled())
return -ENODEV;
+ if (kvmhv_is_nestedv2())
+ return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
if (kvm)
@@ -6084,7 +6430,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
}
srcu_idx = srcu_read_lock(&kvm->srcu);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
struct kvm_memory_slot *memslot;
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
int bkt;
@@ -6185,7 +6531,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.unmap_gfn_range = kvm_unmap_gfn_range_hv,
.age_gfn = kvm_age_gfn_hv,
.test_age_gfn = kvm_test_age_gfn_hv,
- .set_spte_gfn = kvm_set_spte_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
@@ -6196,10 +6541,6 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
-#ifdef CONFIG_KVM_XICS
- .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
- .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
-#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
@@ -6317,6 +6658,22 @@ static int kvmppc_book3s_init_hv(void)
return r;
}
+#if defined(CONFIG_KVM_XICS)
+ /*
+ * IRQ bypass is supported only for interrupts whose EOI operations are
+ * handled via OPAL calls. Therefore, register IRQ bypass handlers
+ * exclusively for PowerNV KVM when booted with 'xive=off', indicating
+ * the use of the emulated XICS interrupt controller.
+ */
+ if (!kvmhv_on_pseries()) {
+ pr_info("KVM-HV: Enabling IRQ bypass\n");
+ kvm_ops_hv.irq_bypass_add_producer =
+ kvmppc_irq_bypass_add_producer_hv;
+ kvm_ops_hv.irq_bypass_del_producer =
+ kvmppc_irq_bypass_del_producer_hv;
+ }
+#endif
+
kvm_ops_hv.owner = THIS_MODULE;
kvmppc_hv_ops = &kvm_ops_hv;
@@ -6341,6 +6698,7 @@ static void kvmppc_book3s_exit_hv(void)
module_init(kvmppc_book3s_init_hv);
module_exit(kvmppc_book3s_exit_hv);
+MODULE_DESCRIPTION("KVM on Book3S (POWER8 and later) in hypervisor mode");
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");