From 234ff0b729ad882d20f7996591a964965647addf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 16 Nov 2018 21:28:18 +1100 Subject: KVM: PPC: Book3S HV: Fix race between kvm_unmap_hva_range and MMU mode switch Testing has revealed an occasional crash which appears to be caused by a race between kvmppc_switch_mmu_to_hpt and kvm_unmap_hva_range_hv. The symptom is a NULL pointer dereference in __find_linux_pte() called from kvm_unmap_radix() with kvm->arch.pgtable == NULL. Looking at kvmppc_switch_mmu_to_hpt(), it does indeed clear kvm->arch.pgtable (via kvmppc_free_radix()) before setting kvm->arch.radix to NULL, and there is nothing to prevent kvm_unmap_hva_range_hv() or the other MMU callback functions from being called concurrently with kvmppc_switch_mmu_to_hpt() or kvmppc_switch_mmu_to_radix(). This patch therefore adds calls to spin_lock/unlock on the kvm->mmu_lock around the assignments to kvm->arch.radix, and makes sure that the partition-scoped radix tree or HPT is only freed after changing kvm->arch.radix. This also takes the kvm->mmu_lock in kvmppc_rmap_reset() to make sure that the clearing of each rmap array (one per memslot) doesn't happen concurrently with use of the array in the kvm_unmap_hva_range_hv() or the other MMU callbacks. Fixes: 18c3640cefc7 ("KVM: PPC: Book3S HV: Add infrastructure for running HPT guests on radix host") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kvm/book3s_64_mmu_hv.c') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c615617e78ac..a18afda3d0f0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm) srcu_idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); /* * This assumes it is acceptable to lose reference and * change bits across a reset. */ memset(memslot->arch.rmap, 0, memslot->npages * sizeof(*memslot->arch.rmap)); + spin_unlock(&kvm->mmu_lock); } srcu_read_unlock(&kvm->srcu, srcu_idx); } -- cgit