From e641a317830b6bd26e6dc2ef5fe2c1c181dd5cc2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Oct 2017 16:39:19 +1100 Subject: KVM: PPC: Book3S HV: Unify dirty page map between HPT and radix Currently, the HPT code in HV KVM maintains a dirty bit per guest page in the rmap array, whether or not dirty page tracking has been enabled for the memory slot. In contrast, the radix code maintains a dirty bit per guest page in memslot->dirty_bitmap, and only does so when dirty page tracking has been enabled. This changes the HPT code to maintain the dirty bits in the memslot dirty_bitmap like radix does. This results in slightly less code overall, and will mean that we do not lose the dirty bits when transitioning between HPT and radix mode in future. There is one minor change to behaviour as a result. With HPT, when dirty tracking was enabled for a memslot, we would previously clear all the dirty bits at that point (both in the HPT entries and in the rmap arrays), meaning that a KVM_GET_DIRTY_LOG ioctl immediately following would show no pages as dirty (assuming no vcpus have run in the meantime). With this change, the dirty bits on HPT entries are not cleared at the point where dirty tracking is enabled, so KVM_GET_DIRTY_LOG would show as dirty any guest pages that are resident in the HPT and dirty. This is consistent with what happens on radix. This also fixes a bug in the mark_pages_dirty() function for radix (in the sense that the function no longer exists). In the case where a large page of 64 normal pages or more is marked dirty, the addressing of the dirty bitmap was incorrect and could write past the end of the bitmap. Fortunately this case was never hit in practice because a 2MB large page is only 32 x 64kB pages, and we don't support backing the guest with 1GB huge pages at this point. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 50 ++++++---------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index c5d7435455f1..6336b13ed233 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -474,26 +474,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; } -static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot, - unsigned long gfn, unsigned int order) -{ - unsigned long i, limit; - unsigned long *dp; - - if (!memslot->dirty_bitmap) - return; - limit = 1ul << order; - if (limit < BITS_PER_LONG) { - for (i = 0; i < limit; ++i) - mark_page_dirty(kvm, gfn + i); - return; - } - dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn); - limit /= BITS_PER_LONG; - for (i = 0; i < limit; ++i) - *dp++ = ~0ul; -} - /* Called with kvm->lock held */ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) @@ -508,12 +488,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); - if (old & _PAGE_DIRTY) { - if (!shift) - mark_page_dirty(kvm, gfn); - else - mark_pages_dirty(kvm, memslot, - gfn, shift - PAGE_SHIFT); + if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { + unsigned long npages = 1; + if (shift) + npages = 1ul << (shift - PAGE_SHIFT); + kvmppc_update_dirty_map(memslot, gfn, npages); } } return 0; @@ -579,20 +558,8 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map) { unsigned long i, j; - unsigned long n, *p; int npages; - /* - * Radix accumulates dirty bits in the first half of the - * memslot's dirty_bitmap area, for when pages are paged - * out or modified by the host directly. Pick up these - * bits and add them to the map. - */ - n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long); - p = memslot->dirty_bitmap; - for (i = 0; i < n; ++i) - map[i] |= xchg(&p[i], 0); - for (i = 0; i < memslot->npages; i = j) { npages = kvm_radix_test_clear_dirty(kvm, memslot, i); @@ -604,9 +571,10 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, * real address, if npages > 1 we can skip to i + npages. */ j = i + 1; - if (npages) - for (j = i; npages; ++j, --npages) - __set_bit_le(j, map); + if (npages) { + set_dirty_bits(map, i, npages); + i = j + npages; + } } return 0; } -- cgit From 18c3640cefc7f1c6986b7be48f5013a8d5e394cb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 13 Sep 2017 16:00:10 +1000 Subject: KVM: PPC: Book3S HV: Add infrastructure for running HPT guests on radix host This sets up the machinery for switching a guest between HPT (hashed page table) and radix MMU modes, so that in future we can run a HPT guest on a radix host on POWER9 machines. * The KVM_PPC_CONFIGURE_V3_MMU ioctl can now specify either HPT or radix mode, on a radix host. * The KVM_CAP_PPC_MMU_HASH_V3 capability now returns 1 on POWER9 with HV KVM on a radix host. * The KVM_PPC_GET_SMMU_INFO returns information about the HPT MMU on a radix host. * The KVM_PPC_ALLOCATE_HTAB ioctl on a radix host will switch the guest to HPT mode and allocate a HPT. * For simplicity, we now allocate the rmap array for each memslot, even on a radix host, since it will be needed if the guest switches to HPT mode. * Since we cannot yet run a HPT guest on a radix host, the KVM_RUN ioctl will return an EINVAL error in that case. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kvm/book3s_64_mmu_radix.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 6336b13ed233..58618f644c56 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -662,6 +662,7 @@ void kvmppc_free_radix(struct kvm *kvm) pgd_clear(pgd); } pgd_free(kvm->mm, kvm->arch.pgtable); + kvm->arch.pgtable = NULL; } static void pte_ctor(void *addr) -- cgit