From 4b99412ed6972cc77c1f16009e1d00323fcef9ab Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 5 May 2020 12:47:16 +0530 Subject: powerpc/kvm/book3s: Add helper to walk partition scoped linux page table. The locking rules for walking partition scoped table is different from process scoped table. Hence add a helper for secondary linux page table walk and also add check whether we are holding the right locks. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200505071729.54912-10-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index aa12cd4078b3..c92d413eeaaf 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -981,11 +981,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; + return 0; } /* Called with kvm->mmu_lock held */ @@ -1001,7 +1001,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -1028,7 +1028,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -1048,7 +1048,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) @@ -1109,7 +1109,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, gpa = memslot->base_gfn << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); for (n = memslot->npages; n; --n) { - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); -- cgit From 6cdf30375f82fbc1d30252096440265426c0993c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 5 May 2020 12:47:18 +0530 Subject: powerpc/kvm/book3s: Use kvm helpers to walk shadow or secondary table update kvmppc_hv_handle_set_rc to use find_kvm_nested_guest_pte and find_kvm_secondary_pte Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200505071729.54912-12-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index c92d413eeaaf..70c4025406d8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -735,7 +735,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, return ret; } -bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, +bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid) { unsigned long pgflags; @@ -750,12 +750,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, pgflags = _PAGE_ACCESSED; if (writing) pgflags |= _PAGE_DIRTY; - /* - * We are walking the secondary (partition-scoped) page table here. - * We can do this without disabling irq because the Linux MM - * subsystem doesn't do THP splits and collapses on this tree. - */ - ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); + + if (nested) + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); + else + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); return true; @@ -949,8 +949,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Failed to set the reference/change bits */ if (dsisr & DSISR_SET_RC) { spin_lock(&kvm->mmu_lock); - if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, - writing, gpa, kvm->arch.lpid)) + if (kvmppc_hv_handle_set_rc(kvm, false, writing, + gpa, kvm->arch.lpid)) dsisr &= ~DSISR_SET_RC; spin_unlock(&kvm->mmu_lock); -- cgit From bda3deaa6fc800218b6b59213e8da644e52787dc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 5 May 2020 12:47:24 +0530 Subject: powerpc/kvm/book3s: use find_kvm_host_pte in kvmppc_book3s_instantiate_page Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200505071729.54912-18-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 70c4025406d8..271f1c3d8443 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -813,12 +813,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. -- cgit From bf8036a4098d1548cdccf9ed5c523ef4e83e3c68 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 28 May 2020 13:34:56 +0530 Subject: powerpc/book3s64/kvm: Fix secondary page table walk warning during migration This patch fixes the below warning reported during migration: find_kvm_secondary_pte called with kvm mmu_lock not held CPU: 23 PID: 5341 Comm: qemu-system-ppc Tainted: G W 5.7.0-rc5-kvm-00211-g9ccf10d6d088 #432 NIP: c008000000fe848c LR: c008000000fe8488 CTR: 0000000000000000 REGS: c000001e19f077e0 TRAP: 0700 Tainted: G W (5.7.0-rc5-kvm-00211-g9ccf10d6d088) MSR: 9000000000029033 CR: 42222422 XER: 20040000 CFAR: c00000000012f5ac IRQMASK: 0 GPR00: c008000000fe8488 c000001e19f07a70 c008000000ffe200 0000000000000039 GPR04: 0000000000000001 c000001ffc8b4900 0000000000018840 0000000000000007 GPR08: 0000000000000003 0000000000000001 0000000000000007 0000000000000001 GPR12: 0000000000002000 c000001fff6d9400 000000011f884678 00007fff70b70000 GPR16: 00007fff7137cb90 00007fff7dcb4410 0000000000000001 0000000000000000 GPR20: 000000000ffe0000 0000000000000000 0000000000000001 0000000000000000 GPR24: 8000000000000000 0000000000000001 c000001e1f67e600 c000001e1fd82410 GPR28: 0000000000001000 c000001e2e410000 0000000000000fff 0000000000000ffe NIP [c008000000fe848c] kvmppc_hv_get_dirty_log_radix+0x2e4/0x340 [kvm_hv] LR [c008000000fe8488] kvmppc_hv_get_dirty_log_radix+0x2e0/0x340 [kvm_hv] Call Trace: [c000001e19f07a70] [c008000000fe8488] kvmppc_hv_get_dirty_log_radix+0x2e0/0x340 [kvm_hv] (unreliable) [c000001e19f07b50] [c008000000fd42e4] kvm_vm_ioctl_get_dirty_log_hv+0x33c/0x3c0 [kvm_hv] [c000001e19f07be0] [c008000000eea878] kvm_vm_ioctl_get_dirty_log+0x30/0x50 [kvm] [c000001e19f07c00] [c008000000edc818] kvm_vm_ioctl+0x2b0/0xc00 [kvm] [c000001e19f07d50] [c00000000046e148] ksys_ioctl+0xf8/0x150 [c000001e19f07da0] [c00000000046e1c8] sys_ioctl+0x28/0x80 [c000001e19f07dc0] [c00000000003652c] system_call_exception+0x16c/0x240 [c000001e19f07e20] [c00000000000d070] system_call_common+0xf0/0x278 Instruction dump: 7d3a512a 4200ffd0 7ffefb78 4bfffdc4 60000000 3c820000 e8848468 3c620000 e86384a8 38840010 4800673d e8410018 <0fe00000> 4bfffdd4 60000000 60000000 Reported-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200528080456.87797-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 35 ++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 271f1c3d8443..954fd7a12149 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -1040,7 +1040,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn + pagenum; unsigned long gpa = gfn << PAGE_SHIFT; - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; int ret = 0; unsigned long old, *rmapp; @@ -1048,12 +1048,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = find_kvm_secondary_pte(kvm, gpa, &shift); - if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { - ret = 1; - if (shift) - ret = 1 << (shift - PAGE_SHIFT); + /* + * For performance reasons we don't hold kvm->mmu_lock while walking the + * partition scoped table. + */ + ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); + if (!ptep) + return 0; + + pte = READ_ONCE(*ptep); + if (pte_present(pte) && pte_dirty(pte)) { spin_lock(&kvm->mmu_lock); + /* + * Recheck the pte again + */ + if (pte_val(pte) != pte_val(*ptep)) { + /* + * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can + * only find PAGE_SIZE pte entries here. We can continue + * to use the pte addr returned by above page table + * walk. + */ + if (!pte_present(*ptep) || !pte_dirty(*ptep)) { + spin_unlock(&kvm->mmu_lock); + return 0; + } + } + + ret = 1; + VM_BUG_ON(shift); old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); -- cgit