summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_64_mmu_radix.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c')
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c107
1 files changed, 68 insertions, 39 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index aa41183d2a97..3cb0c9843d01 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -11,12 +11,12 @@
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/debugfs.h>
+#include <linux/pgtable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
#include <asm/ultravisor.h>
@@ -514,13 +514,14 @@ void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
unsigned long ig;
for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+ p4d_t *p4d = p4d_offset(pgd, 0);
pud_t *pud;
- if (!pgd_present(*pgd))
+ if (!p4d_present(*p4d))
continue;
- pud = pud_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
kvmppc_unmap_free_pud(kvm, pud, lpid);
- pgd_clear(pgd);
+ p4d_clear(p4d);
}
}
@@ -581,6 +582,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
unsigned long *rmapp, struct rmap_nested **n_rmap)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud, *new_pud = NULL;
pmd_t *pmd, *new_pmd = NULL;
pte_t *ptep, *new_ptep = NULL;
@@ -588,9 +590,11 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
/* Traverse the guest's 2nd-level tree, allocate new levels needed */
pgd = pgtable + pgd_index(gpa);
+ p4d = p4d_offset(pgd, gpa);
+
pud = NULL;
- if (pgd_present(*pgd))
- pud = pud_offset(pgd, gpa);
+ if (p4d_present(*p4d))
+ pud = pud_offset(p4d, gpa);
else
new_pud = pud_alloc_one(kvm->mm, gpa);
@@ -611,13 +615,13 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
/* Now traverse again under the lock and change the tree */
ret = -ENOMEM;
- if (pgd_none(*pgd)) {
+ if (p4d_none(*p4d)) {
if (!new_pud)
goto out_unlock;
- pgd_populate(kvm->mm, pgd, new_pud);
+ p4d_populate(kvm->mm, p4d, new_pud);
new_pud = NULL;
}
- pud = pud_offset(pgd, gpa);
+ pud = pud_offset(p4d, gpa);
if (pud_is_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
@@ -750,7 +754,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
return ret;
}
-bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
unsigned long gpa, unsigned int lpid)
{
unsigned long pgflags;
@@ -765,12 +769,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
pgflags = _PAGE_ACCESSED;
if (writing)
pgflags |= _PAGE_DIRTY;
- /*
- * We are walking the secondary (partition-scoped) page table here.
- * We can do this without disabling irq because the Linux MM
- * subsystem doesn't do THP splits and collapses on this tree.
- */
- ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
+
+ if (nested)
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+ else
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+
if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
return true;
@@ -806,7 +810,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
* is that the page is writable.
*/
hva = gfn_to_hva_memslot(memslot, gfn);
- if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
upgrade_write = true;
} else {
unsigned long pfn;
@@ -828,12 +832,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
* Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/
- local_irq_disable();
- ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
pte = __pte(0);
if (ptep)
- pte = *ptep;
- local_irq_enable();
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
/*
* If the PTE disappeared temporarily due to a THP
* collapse, just return and let the guest try again.
@@ -964,8 +968,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
/* Failed to set the reference/change bits */
if (dsisr & DSISR_SET_RC) {
spin_lock(&kvm->mmu_lock);
- if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
- writing, gpa, kvm->arch.lpid))
+ if (kvmppc_hv_handle_set_rc(kvm, false, writing,
+ gpa, kvm->arch.lpid))
dsisr &= ~DSISR_SET_RC;
spin_unlock(&kvm->mmu_lock);
@@ -996,11 +1000,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
return 0;
}
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
- return 0;
+ return 0;
}
/* Called with kvm->mmu_lock held */
@@ -1016,7 +1020,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ref;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
gpa, shift);
@@ -1043,7 +1047,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ref;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
return ref;
@@ -1055,7 +1059,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
{
unsigned long gfn = memslot->base_gfn + pagenum;
unsigned long gpa = gfn << PAGE_SHIFT;
- pte_t *ptep;
+ pte_t *ptep, pte;
unsigned int shift;
int ret = 0;
unsigned long old, *rmapp;
@@ -1063,12 +1067,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ret;
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
- if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
- ret = 1;
- if (shift)
- ret = 1 << (shift - PAGE_SHIFT);
+ /*
+ * For performance reasons we don't hold kvm->mmu_lock while walking the
+ * partition scoped table.
+ */
+ ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
+ if (!ptep)
+ return 0;
+
+ pte = READ_ONCE(*ptep);
+ if (pte_present(pte) && pte_dirty(pte)) {
spin_lock(&kvm->mmu_lock);
+ /*
+ * Recheck the pte again
+ */
+ if (pte_val(pte) != pte_val(*ptep)) {
+ /*
+ * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
+ * only find PAGE_SIZE pte entries here. We can continue
+ * to use the pte addr returned by above page table
+ * walk.
+ */
+ if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
+ spin_unlock(&kvm->mmu_lock);
+ return 0;
+ }
+ }
+
+ ret = 1;
+ VM_BUG_ON(shift);
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
@@ -1124,7 +1151,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
gpa = memslot->base_gfn << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
for (n = memslot->npages; n; --n) {
- ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
@@ -1240,7 +1267,8 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
unsigned long gpa;
pgd_t *pgt;
struct kvm_nested_guest *nested;
- pgd_t pgd, *pgdp;
+ pgd_t *pgdp;
+ p4d_t p4d, *p4dp;
pud_t pud, *pudp;
pmd_t pmd, *pmdp;
pte_t *ptep;
@@ -1313,13 +1341,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
}
pgdp = pgt + pgd_index(gpa);
- pgd = READ_ONCE(*pgdp);
- if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
- gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
+ p4dp = p4d_offset(pgdp, gpa);
+ p4d = READ_ONCE(*p4dp);
+ if (!(p4d_val(p4d) & _PAGE_PRESENT)) {
+ gpa = (gpa & P4D_MASK) + P4D_SIZE;
continue;
}
- pudp = pud_offset(&pgd, gpa);
+ pudp = pud_offset(&p4d, gpa);
pud = READ_ONCE(*pudp);
if (!(pud_val(pud) & _PAGE_PRESENT)) {
gpa = (gpa & PUD_MASK) + PUD_SIZE;