diff options
Diffstat (limited to 'arch/arm64/kvm/mmu.c')
-rw-r--r-- | arch/arm64/kvm/mmu.c | 153 |
1 files changed, 101 insertions, 52 deletions
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2feb6c6b63af..1c78864767c5 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, return 0; } -static bool kvm_is_device_pfn(unsigned long pfn) -{ - return !pfn_is_map_memory(pfn); -} - static void *stage2_memcache_zalloc_page(void *arg) { struct kvm_mmu_memory_cache *mc = arg; @@ -1304,6 +1299,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, if (map_size == PAGE_SIZE) return true; + /* pKVM only supports PMD_SIZE huge-mappings */ + if (is_protected_kvm_enabled() && map_size != PMD_SIZE) + return false; + size = memslot->npages * PAGE_SIZE; gpa_start = memslot->base_gfn << PAGE_SHIFT; @@ -1466,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) return vma->vm_flags & VM_MTE_ALLOWED; } +static bool kvm_vma_is_cacheable(struct vm_area_struct *vma) +{ + switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) { + case MT_NORMAL_NC: + case MT_DEVICE_nGnRnE: + case MT_DEVICE_nGnRE: + return false; + default: + return true; + } +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, @@ -1473,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, { int ret = 0; bool write_fault, writable, force_pte = false; - bool exec_fault, mte_allowed; - bool device = false, vfio_allow_any_uc = false; + bool exec_fault, mte_allowed, is_vma_cacheable; + bool s2_force_noncacheable = false, vfio_allow_any_uc = false; unsigned long mmu_seq; phys_addr_t ipa = fault_ipa; struct kvm *kvm = vcpu->kvm; @@ -1488,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; + vm_flags_t vm_flags; enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; if (fault_is_perm) @@ -1501,6 +1513,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + if (!is_protected_kvm_enabled()) + memcache = &vcpu->arch.mmu_page_cache; + else + memcache = &vcpu->arch.pkvm_memcache; + /* * Permission faults just need to update the existing leaf entry, * and so normally don't require allocations from the memcache. The @@ -1510,13 +1527,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!fault_is_perm || (logging_active && write_fault)) { int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - if (!is_protected_kvm_enabled()) { - memcache = &vcpu->arch.mmu_page_cache; + if (!is_protected_kvm_enabled()) ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - } else { - memcache = &vcpu->arch.pkvm_memcache; + else ret = topup_hyp_memcache(memcache, min_pages); - } + if (ret) return ret; } @@ -1537,7 +1552,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * logging_active is guaranteed to never be true for VM_PFNMAP * memslots. */ - if (logging_active || is_protected_kvm_enabled()) { + if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; } else { @@ -1612,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; + vm_flags = vma->vm_flags; + + is_vma_cacheable = kvm_vma_is_cacheable(vma); + /* Don't use the VMA after the unlock -- it may have vanished */ vma = NULL; @@ -1635,18 +1654,39 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (is_error_noslot_pfn(pfn)) return -EFAULT; - if (kvm_is_device_pfn(pfn)) { - /* - * If the page was identified as device early by looking at - * the VMA flags, vma_pagesize is already representing the - * largest quantity we can map. If instead it was mapped - * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE - * and must not be upgraded. - * - * In both cases, we don't let transparent_hugepage_adjust() - * change things at the last minute. - */ - device = true; + /* + * Check if this is non-struct page memory PFN, and cannot support + * CMOs. It could potentially be unsafe to access as cachable. + */ + if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) { + if (is_vma_cacheable) { + /* + * Whilst the VMA owner expects cacheable mapping to this + * PFN, hardware also has to support the FWB and CACHE DIC + * features. + * + * ARM64 KVM relies on kernel VA mapping to the PFN to + * perform cache maintenance as the CMO instructions work on + * virtual addresses. VM_PFNMAP region are not necessarily + * mapped to a KVA and hence the presence of hardware features + * S2FWB and CACHE DIC are mandatory to avoid the need for + * cache maintenance. + */ + if (!kvm_supports_cacheable_pfnmap()) + return -EFAULT; + } else { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via __kvm_faultin_pfn(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ + s2_force_noncacheable = true; + } } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -1655,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, writable = false; } - if (exec_fault && device) + if (exec_fault && s2_force_noncacheable) return -ENOEXEC; /* @@ -1688,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) { if (fault_is_perm && fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else @@ -1702,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } } - if (!fault_is_perm && !device && kvm_has_mte(kvm)) { + if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (mte_allowed) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1718,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - if (device) { + if (s2_force_noncacheable) { if (vfio_allow_any_uc) prot |= KVM_PGTABLE_PROT_NORMAL_NC; else @@ -1794,9 +1834,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; + /* Synchronous External Abort? */ + if (kvm_vcpu_abt_issea(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (kvm_handle_guest_sea()) + return kvm_inject_serror(vcpu); + + return 1; + } + esr = kvm_vcpu_get_esr(vcpu); + /* + * The fault IPA should be reliable at this point as we're not dealing + * with an SEA. + */ ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) + return -EFAULT; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1810,26 +1869,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) { fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - if (is_iabt) - kvm_inject_pabt(vcpu, fault_ipa); - else - kvm_inject_dabt(vcpu, fault_ipa); - return 1; + return kvm_inject_sea(vcpu, is_iabt, fault_ipa); } } - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - kvm_inject_vabt(vcpu); - - return 1; - } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); @@ -1898,8 +1941,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } if (kvm_vcpu_abt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; + ret = kvm_inject_sea_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); goto out_unlock; } @@ -1944,10 +1986,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) if (ret == 0) ret = 1; out: - if (ret == -ENOEXEC) { - kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - ret = 1; - } + if (ret == -ENOEXEC) + ret = kvm_inject_sea_iabt(vcpu, kvm_vcpu_get_hfar(vcpu)); out_unlock: srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; @@ -2207,6 +2247,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ret = -EINVAL; break; } + + /* + * Cacheable PFNMAP is allowed only if the hardware + * supports it. + */ + if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) { + ret = -EINVAL; + break; + } } hva = min(reg_end, vma->vm_end); } while (hva < reg_end); |