From 870cfe77a91e91cac5937784d73b9d27b6a12296 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 19 Jul 2017 14:49:26 +1000 Subject: powerpc/mm: Update definitions of DSISR bits This updates the definitions for the various DSISR bits to match both some historical stuff and to match new bits on POWER9. In addition, we define some masks corresponding to the "bad" faults on Book3S, and some masks corresponding to the bits that match between DSISR and SRR1 for a DSI and an ISI. This comes with a small code update to change the definition of DSISR_PGDIRFAULT which becomes DSISR_PRTABLE_FAULT to match architecture 3.0B Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c5762..6d677c79eeb1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -322,13 +322,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa = vcpu->arch.fault_gpa & ~0xfffUL; gpa &= ~0xF000000000000000ul; gfn = gpa >> PAGE_SHIFT; - if (!(dsisr & DSISR_PGDIRFAULT)) + if (!(dsisr & DSISR_PRTABLE_FAULT)) gpa |= ea & 0xfff; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | + if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | DSISR_SET_RC)) { /* * Bad address in guest page table tree, or other -- cgit From 94171b19c3f1f4d9d4c0e3aaa1aa161def1ec7ea Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:53 +0530 Subject: powerpc/mm: Rename find_linux_pte_or_hugepte() Add newer helpers to make the function usage simpler. It is always recommended to use find_current_mm_pte() for walking the page table. If we cannot use find_current_mm_pte(), it should be documented why the said usage of __find_linux_pte() is safe against a parallel THP split. For now we have KVM code using __find_linux_pte(). This is because kvm code ends up calling __find_linux_pte() in real mode with MSR_EE=0 but with PACA soft_enabled = 1. We may want to fix that later and make sure we keep the MSR_EE and PACA soft_enabled in sync. When we do that we can switch kvm to use find_linux_pte(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 +++-- arch/powerpc/kvm/book3s_64_mmu_radix.c | 28 ++++++++++++++-------------- arch/powerpc/kvm/book3s_64_vio_hv.c | 12 +++++++++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 18 +++++++++--------- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- 5 files changed, 39 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cb0190e2a73..4b219db39c47 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "trace_hv.h" @@ -597,8 +598,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c5762..7d719c8aa0bb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Supported radix tree geometry. @@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (writing) pgflags |= _PAGE_DIRTY; local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, - NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); if (ptep) { pte = READ_ONCE(*ptep); if (pte_present(pte) && @@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, spin_unlock(&kvm->mmu_lock); return RESUME_GUEST; } - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, - gpa, NULL, &shift); + /* + * We are walking the secondary page table here. We can do this + * without disabling irq. + */ + ptep = __find_linux_pte(kvm->arch.pgtable, + gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); @@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); @@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; unsigned long old; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, gpa, shift); @@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, unsigned int shift; int ret = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 3adfd2f5301c..c32e9bfe75b1 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_BUG @@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, pte_t *ptep, pte; unsigned shift = 0; - ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); + /* + * Called in real mode with MSR_EE = 0. We are safe here. + * It is ok to do the lookup with arch.pgdir here, because + * we are doing this on secondary cpus and current task there + * is not the hypervisor. Also this is safe against THP in the + * host, because an IPI to primary thread will wait for the secondary + * to exit which will agains result in the below page table walk + * to finish. + */ + ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); if (!ptep || !pte_present(*ptep)) return -ENXIO; pte = *ptep; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 584c74c8119f..fedb0139524c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) @@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x) /* * assume we don't have huge pages in vmalloc space... * So don't worry about THP collapse/split. Called - * Only in realmode, hence won't need irq_save/restore. + * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. */ - p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL); + p = find_init_mm_pte(addr, NULL); if (!p || !pte_present(*p)) return NULL; addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); @@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ - if (realmode) - ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - else { + if (!realmode) local_irq_save(irq_flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - } + /* + * If called in real mode we have MSR_EE = 0. Otherwise + * we disable irq above. + */ + ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 77fd043b3ecc..c6c734424c70 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "e500.h" #include "timing.h" @@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, NULL); if (ptep) { pte_t pte = READ_ONCE(*ptep); -- cgit From 94a04bc25a2c6296bd0c5e82c10e8231c2b11f77 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 25 Aug 2017 14:30:33 +1000 Subject: KVM: PPC: Book3S HV: POWER9 does not require secondary thread management POWER9 CPUs have independent MMU contexts per thread, so KVM does not need to quiesce secondary threads, so the hwthread_req/hwthread_state protocol does not have to be used. So patch it away on POWER9, and patch away the branch from the Linux idle wakeup to kvm_start_guest that is never used. Add a warning and error out of kvmppc_grab_hwthread in case it is ever called on POWER9. This avoids a hwsync in the idle wakeup path on POWER9. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras [mpe: Use WARN(...) instead of WARN_ON()/pr_err(...)] Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv.c | 13 ++++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0b436df746fc..8bad44b46dc8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2111,6 +2111,15 @@ static int kvmppc_grab_hwthread(int cpu) struct paca_struct *tpaca; long timeout = 10000; + /* + * ISA v3.0 idle routines do not set hwthread_state or test + * hwthread_req, so they can not grab idle threads. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + WARN(1, "KVM: can not control sibling threads\n"); + return -EBUSY; + } + tpaca = &paca[cpu]; /* Ensure the thread won't go into the kernel if it wakes */ @@ -2145,10 +2154,12 @@ static void kvmppc_release_hwthread(int cpu) struct paca_struct *tpaca; tpaca = &paca[cpu]; - tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; tpaca->kvm_hstate.kvm_vcore = NULL; tpaca->kvm_hstate.kvm_split_mode = NULL; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + tpaca->kvm_hstate.hwthread_req = 0; + } static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cb44065e2946..eacf3d06eb75 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -149,9 +149,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) subf r4, r4, r3 mtspr SPRN_DEC, r4 +BEGIN_FTR_SECTION /* hwthread_req may have got set by cede or no vcpu, so clear it */ li r0, 0 stb r0, HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* * For external interrupts we need to call the Linux @@ -314,6 +316,7 @@ kvm_novcpu_exit: * Relocation is off and most register values are lost. * r13 points to the PACA. * r3 contains the SRR1 wakeup value, SRR1 is trashed. + * This is not used by ISAv3.0B processors. */ .globl kvm_start_guest kvm_start_guest: @@ -432,6 +435,9 @@ kvm_secondary_got_guest: * While waiting we also need to check if we get given a vcpu to run. */ kvm_no_guest: +BEGIN_FTR_SECTION + twi 31,0,0 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r3, HSTATE_HWTHREAD_REQ(r13) cmpwi r3, 0 bne 53f @@ -2466,8 +2472,10 @@ kvm_do_nap: clrrdi r0, r0, 1 mtspr SPRN_CTRLT, r0 +BEGIN_FTR_SECTION li r0,1 stb r0,HSTATE_HWTHREAD_REQ(r13) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 BEGIN_FTR_SECTION -- cgit