From b6ae256afd32f96bec0117175b329d0dd617655e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 12 Dec 2019 20:50:55 +0100 Subject: KVM: arm64: Only sign-extend MMIO up to register width On AArch64 you can do a sign-extended load to either a 32-bit or 64-bit register, and we should only sign extend the register up to the width of the register as specified in the operation (by using the 32-bit Wn or 64-bit Xn register specifier). As it turns out, the architecture provides this decoding information in the SF ("Sixty-Four" -- how cute...) bit. Let's take advantage of this with the usual 32-bit/64-bit header file dance and do the right thing on AArch64 hosts. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20191212195055.5541-1-christoffer.dall@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm/include/asm/kvm_mmio.h | 2 ++ arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/include/asm/kvm_mmio.h | 6 ++---- virt/kvm/arm/mmio.c | 6 ++++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9b118516d2db..fe55d8737a11 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -182,6 +182,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 7c0eddb0adb2..32fbf82e3ebc 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -14,6 +14,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5efe5ca8fecf..f407b6bdad2e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -283,6 +283,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 02b5c48fd467..b204501a0c39 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -10,13 +10,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 70d3b449692c..1bb71acd53f2 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -105,6 +105,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) data = (data ^ mask) - mask; } + if (!vcpu->arch.mmio_decode.sixty_four) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); @@ -125,6 +128,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) unsigned long rt; int access_size; bool sign_extend; + bool sixty_four; if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ @@ -138,11 +142,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); + sixty_four = kvm_vcpu_dabt_issf(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; + vcpu->arch.mmio_decode.sixty_four = sixty_four; return 0; } -- cgit From 8c58be34494b7f1b2adb446e2d8beeb90e5de65b Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 13 Dec 2019 10:42:37 +0100 Subject: KVM: arm/arm64: vgic-its: Fix restoration of unmapped collections Saving/restoring an unmapped collection is a valid scenario. For example this happens if a MAPTI command was sent, featuring an unmapped collection. At the moment the CTE fails to be restored. Only compare against the number of online vcpus if the rdist base is set. Fixes: ea1ad53e1e31a ("KVM: arm64: vgic-its: Collection table save/restore") Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20191213094237.19627-1-eric.auger@redhat.com --- virt/kvm/arm/vgic/vgic-its.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 98c7360d9fb7..17920d1b350a 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -2475,7 +2475,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); -- cgit From 5f675c56ed262103b825cbab0e96c34fe681318d Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 20 Dec 2019 19:18:33 +0800 Subject: KVM: arm/arm64: vgic: Handle GICR_PENDBASER.PTZ filed as RAZ Although guest will hardly read and use the PTZ (Pending Table Zero) bit in GICR_PENDBASER, let us emulate the architecture strictly. As per IHI 0069E 9.11.30, PTZ field is WO, and reads as 0. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20191220111833.1422-1-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 7dfd15dbb308..ebc218840fc2 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -414,8 +414,11 @@ static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; - return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); + value &= ~GICR_PENDBASER_PTZ; + + return extract_bytes(value, addr & 7, len); } static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, -- cgit From f5523423defb0d929e23813c8dd16c0131043a8c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 28 Dec 2019 11:57:14 +0000 Subject: arm64: kvm: Fix IDMAP overlap with HYP VA Booting 5.4 on LX2160A reveals that KVM is non-functional: kvm: Limiting the IPA size due to kernel Virtual Address limit kvm [1]: IPA Size Limit: 43bits kvm [1]: IDMAP intersecting with HYP VA, unable to continue kvm [1]: error initializing Hyp mode: -22 Debugging shows: kvm [1]: IDMAP page: 81a26000 kvm [1]: HYP VA range: 0:22ffffffff as RAM is located at: 80000000-fbdfffff : System RAM 2080000000-237fffffff : System RAM Comparing this with the same kernel on Armada 8040 shows: kvm: Limiting the IPA size due to kernel Virtual Address limit kvm [1]: IPA Size Limit: 43bits kvm [1]: IDMAP page: 2a26000 kvm [1]: HYP VA range: 4800000000:493fffffff ... kvm [1]: Hyp mode initialized successfully which indicates that hyp_va_msb is set, and is always set to the opposite value of the idmap page to avoid the overlap. This does not happen with the LX2160A. Further debugging shows vabits_actual = 39, kva_msb = 38 on LX2160A and kva_msb = 33 on Armada 8040. Looking at the bit layout of the HYP VA, there is still one bit available for hyp_va_msb. Set this bit appropriately. This allows KVM to be functional on the LX2160A, but without any HYP VA randomisation: kvm: Limiting the IPA size due to kernel Virtual Address limit kvm [1]: IPA Size Limit: 43bits kvm [1]: IDMAP page: 81a24000 kvm [1]: HYP VA range: 4000000000:62ffffffff ... kvm [1]: Hyp mode initialized successfully Fixes: ed57cac83e05 ("arm64: KVM: Introduce EL2 VA randomisation") Signed-off-by: Russell King [maz: small additional cleanups, preserved case where the tag is legitimately 0 and we can just use the mask, Fixes tag] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/E1ilAiY-0000MA-RG@rmk-PC.armlinux.org.uk --- arch/arm64/kvm/va_layout.c | 56 +++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index dab1fea4752a..a4f48c1ac28c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -13,52 +13,46 @@ #include /* - * The LSB of the random hyp VA tag or 0 if no randomization is used. + * The LSB of the HYP VA tag */ static u8 tag_lsb; /* - * The random hyp VA tag value with the region bit if hyp randomization is used + * The HYP VA tag value with the region bit */ static u64 tag_val; static u64 va_mask; +/* + * We want to generate a hyp VA with the following format (with V == + * vabits_actual): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + * |--------- tag_val -----------|----- va_mask ---| + * + * which does not conflict with the idmap regions. + */ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; - int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); hyp_va_msb ^= BIT(vabits_actual - 1); - kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (vabits_actual - 1)) { - /* - * No space in the address, let's compute the mask so - * that it covers (vabits_actual - 1) bits, and the region - * bit. The tag stays set to zero. - */ - va_mask = BIT(vabits_actual - 1) - 1; - va_mask |= hyp_va_msb; - } else { - /* - * We do have some free bits to insert a random tag. - * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == vabits_actual): - * - * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 - * --------------------------------------------------------- - * | 0000000 | hyp_va_msb | random tag | kern linear VA | - */ - tag_lsb = kva_msb; - va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); - tag_val |= hyp_va_msb; - tag_val >>= tag_lsb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = hyp_va_msb; + + if (tag_lsb != (vabits_actual - 1)) { + /* We have some free bits to insert a random tag. */ + tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } + tag_val >>= tag_lsb; } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * VHE doesn't need any address translation, let's NOP * everything. * - * Alternatively, if we don't have any spare bits in - * the address, NOP everything after masking that - * kernel VA. + * Alternatively, if the tag is zero (because the layout + * dictates it and we don't have any spare bits in the + * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_lsb && i > 0)) { + if (has_vhe() || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } -- cgit From c3e35409b54e8833ab936e667e3e7fcb8bdace00 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 2 Dec 2019 15:42:11 +0800 Subject: KVM: ARM: Call hyp_cpu_pm_exit at the right place It doesn't needs to call hyp_cpu_pm_exit() in init_hyp_mode() when some error occurs. hyp_cpu_pm_exit() only needs to be called in kvm_arch_init() if init_subsystems() fails. So move hyp_cpu_pm_exit() out from teardown_hyp_mode() and call it directly in kvm_arch_init(). Signed-off-by: Shannon Zhao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1575272531-3204-1-git-send-email-shannon.zhao@linux.alibaba.com --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 8de4daf25097..b5d57ed6443c 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1537,7 +1537,6 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - hyp_cpu_pm_exit(); } /** @@ -1751,6 +1750,7 @@ int kvm_arch_init(void *opaque) return 0; out_hyp: + hyp_cpu_pm_exit(); if (!in_hyp_mode) teardown_hyp_mode(); out_err: -- cgit From de9375634b1ef49091004d08e5cd4f68695adf0f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 13 Nov 2019 09:40:45 +0800 Subject: KVM: arm: Remove duplicate include Remove duplicate header which is included twice. Signed-off-by: YueHaibing Signed-off-by: Marc Zyngier Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20191113014045.15276-1-yuehaibing@huawei.com --- virt/kvm/arm/arm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index b5d57ed6443c..efda376ab3c5 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -20,8 +20,6 @@ #include #include #include -#include -#include #define CREATE_TRACE_POINTS #include "trace.h" -- cgit From 1559b7583ff6ed018c5320d1503fa80b435775f0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 17 Dec 2019 12:38:09 +0000 Subject: KVM: arm/arm64: Re-check VMA on detecting a poisoned page When we check for a poisoned page, we use the VMA to tell userspace about the looming disaster. But we pass a pointer to this VMA after having released the mmap_sem, which isn't a good idea. Instead, stash the shift value that goes with this pfn while we are holding the mmap_sem. Reported-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Link: https://lore.kernel.org/r/20191211165651.7889-3-maz@kernel.org Link: https://lore.kernel.org/r/20191217123809.197392-1-james.morse@arm.com --- virt/kvm/arm/mmu.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 0b32a904a1bb..e3ad95013192 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1596,16 +1596,8 @@ static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) __invalidate_icache_guest_page(pfn, size); } -static void kvm_send_hwpoison_signal(unsigned long address, - struct vm_area_struct *vma) +static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { - short lsb; - - if (is_vm_hugetlb_page(vma)) - lsb = huge_page_shift(hstate_vma(vma)); - else - lsb = PAGE_SHIFT; - send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); } @@ -1678,6 +1670,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; + short vma_shift; kvm_pfn_t pfn; pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); @@ -1701,7 +1694,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - vma_pagesize = vma_kernel_pagesize(vma); + if (is_vm_hugetlb_page(vma)) + vma_shift = huge_page_shift(hstate_vma(vma)); + else + vma_shift = PAGE_SHIFT; + + vma_pagesize = 1ULL << vma_shift; if (logging_active || (vma->vm_flags & VM_PFNMAP) || !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { @@ -1741,7 +1739,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(hva, vma); + kvm_send_hwpoison_signal(hva, vma_shift); return 0; } if (is_error_noslot_pfn(pfn)) -- cgit From a425372e733177eb0779748956bc16c85167af48 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 8 Jan 2020 13:43:22 +0000 Subject: KVM: arm64: Correct PSTATE on exception entry When KVM injects an exception into a guest, it generates the PSTATE value from scratch, configuring PSTATE.{M[4:0],DAIF}, and setting all other bits to zero. This isn't correct, as the architecture specifies that some PSTATE bits are (conditionally) cleared or set upon an exception, and others are unchanged from the original context. This patch adds logic to match the architectural behaviour. To make this simple to follow/audit/extend, documentation references are provided, and bits are configured in order of their layout in SPSR_EL2. This layout can be seen in the diagram on ARM DDI 0487E.a page C5-429. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200108134324.46500-2-mark.rutland@arm.com --- arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kvm/inject_fault.c | 70 +++++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index ccdb6a051ab2..6aafc2825c1c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include #include -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* -- cgit From 3c2483f15499b877ccb53250d88addb8c91da147 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 8 Jan 2020 13:43:23 +0000 Subject: KVM: arm/arm64: Correct CPSR on exception entry When KVM injects an exception into a guest, it generates the CPSR value from scratch, configuring CPSR.{M,A,I,T,E}, and setting all other bits to zero. This isn't correct, as the architecture specifies that some CPSR bits are (conditionally) cleared or set upon an exception, and others are unchanged from the original context. This patch adds logic to match the architectural behaviour. To make this simple to follow/audit/extend, documentation references are provided, and bits are configured in order of their layout in SPSR_EL2. This layout can be seen in the diagram on ARM DDI 0487E.a page C5-426. Note that this code is used by both arm and arm64, and is intended to fuction with the SPSR_EL2 and SPSR_HYP layouts. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200108134324.46500-3-mark.rutland@arm.com --- arch/arm/include/asm/kvm_emulate.h | 12 ++++ arch/arm64/include/asm/ptrace.h | 1 + virt/kvm/arm/aarch32.c | 111 +++++++++++++++++++++++++++++++++---- 3 files changed, 114 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index fe55d8737a11..c488c629e6c8 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -14,13 +14,25 @@ #include /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index c4c57ba99e90..773cf1439081 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -10,6 +10,7 @@ * Author: Christoffer Dall */ +#include #include #include #include @@ -28,22 +29,112 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long cpsr; unsigned long new_spsr_value = *vcpu_cpsr(vcpu); bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | PSR_AA32_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= PSR_AA32_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= PSR_AA32_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ vcpu_write_spsr(vcpu, new_spsr_value); @@ -84,7 +175,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); *far = addr; -- cgit From 1cfbb484de158e378e8971ac40f3082e53ecca55 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 8 Jan 2020 13:43:24 +0000 Subject: KVM: arm/arm64: Correct AArch32 SPSR on exception entry Confusingly, there are three SPSR layouts that a kernel may need to deal with: (1) An AArch64 SPSR_ELx view of an AArch64 pstate (2) An AArch64 SPSR_ELx view of an AArch32 pstate (3) An AArch32 SPSR_* view of an AArch32 pstate When the KVM AArch32 support code deals with SPSR_{EL2,HYP}, it's either dealing with #2 or #3 consistently. On arm64 the PSR_AA32_* definitions match the AArch64 SPSR_ELx view, and on arm the PSR_AA32_* definitions match the AArch32 SPSR_* view. However, when we inject an exception into an AArch32 guest, we have to synthesize the AArch32 SPSR_* that the guest will see. Thus, an AArch64 host needs to synthesize layout #3 from layout #2. This patch adds a new host_spsr_to_spsr32() helper for this, and makes use of it in the KVM AArch32 support code. For arm64 we need to shuffle the DIT bit around, and remove the SS bit, while for arm we can use the value as-is. I've open-coded the bit manipulation for now to avoid having to rework the existing PSR_* definitions into PSR64_AA32_* and PSR32_AA32_* definitions. I hope to perform a more thorough refactoring in future so that we can handle pstate view manipulation more consistently across the kernel tree. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200108134324.46500-4-mark.rutland@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/include/asm/kvm_emulate.h | 32 ++++++++++++++++++++++++++++++++ virt/kvm/arm/aarch32.c | 6 +++--- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index c488c629e6c8..08d9805f613b 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -53,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f407b6bdad2e..53ea7637b7b2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -219,6 +219,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 773cf1439081..631d397ac81b 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -129,15 +129,15 @@ static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, new_spsr_value); + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ -- cgit From 821c10c2ae0bac5a8503cc7e961e7af90ea676eb Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 14 Jan 2020 19:22:12 +0800 Subject: KVM: arm/arm64: vgic-its: Properly check the unmapped coll in DISCARD handler Discard is supposed to fail if the collection is not mapped to any target redistributor. We currently check if the collection is mapped by "ite->collection" but this is incomplete (e.g., mapping a LPI to an unmapped collection also results in a non NULL ite->collection). What actually needs to be checked is its_is_collection_mapped(), let's turn to it. Also take this chance to remove an extra blank line. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20200114112212.1411-1-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-its.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 17920d1b350a..d53d34a33e35 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -839,9 +839,8 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, u32 event_id = its_cmd_get_id(its_cmd); struct its_ite *ite; - ite = find_ite(its, device_id, event_id); - if (ite && ite->collection) { + if (ite && its_is_collection_mapped(ite->collection)) { /* * Though the spec talks about removing the pending state, we * don't bother here since we clear the ITTE anyway and the -- cgit From 31a9b0b11b1c5264433a4fa1e1e1e8aa03954b1c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 19 Jan 2020 17:06:04 +0800 Subject: KVM: arm/arm64: vgic: Drop the kvm_vgic_register_mmio_region() kvm_vgic_register_mmio_region() was introduced in commit 4493b1c4866a ("KVM: arm/arm64: vgic-new: Add MMIO handling framework") but never used, and even never implemented. Remove it to avoid confusing readers. Reported-by: Haibin Wang Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200119090604.398-1-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-mmio.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 836f418f1ee8..5af2aefad435 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -98,11 +98,6 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .uaccess_write = uwr, \ } -int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, - struct vgic_register_region *reg_desc, - struct vgic_io_device *region, - int nr_irqs, bool offset_private); - unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, -- cgit From 0e20f5e25556c00ee813469d373b00abcf298708 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Dec 2019 13:25:25 +0000 Subject: KVM: arm/arm64: Cleanup MMIO handling Our MMIO handling is a bit odd, in the sense that it uses an intermediate per-vcpu structure to store the various decoded information that describe the access. But the same information is readily available in the HSR/ESR_EL2 field, and we actually use this field to populate the structure. Let's simplify the whole thing by getting rid of the superfluous structure and save a (tiny) bit of space in the vcpu structure. [32bit fix courtesy of Olof Johansson ] Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_emulate.h | 5 ++- arch/arm/include/asm/kvm_host.h | 12 ++++--- arch/arm/include/asm/kvm_hyp.h | 1 + arch/arm/include/asm/kvm_mmio.h | 28 --------------- arch/arm64/include/asm/kvm_emulate.h | 3 +- arch/arm64/include/asm/kvm_host.h | 12 ++++--- arch/arm64/include/asm/kvm_mmio.h | 27 -------------- virt/kvm/arm/mmio.c | 70 ++++++++++++------------------------ virt/kvm/arm/mmu.c | 1 - 9 files changed, 42 insertions(+), 117 deletions(-) delete mode 100644 arch/arm/include/asm/kvm_mmio.h delete mode 100644 arch/arm64/include/asm/kvm_mmio.h diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 08d9805f613b..3944305e81df 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -220,7 +219,7 @@ static inline bool kvm_vcpu_dabt_is_cm(struct kvm_vcpu *vcpu) } /* Get Access Size from a data abort */ -static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) { switch ((kvm_vcpu_get_hsr(vcpu) >> 22) & 0x3) { case 0: @@ -231,7 +230,7 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) return 4; default: kvm_err("Hardware is weird: SAS 0b11 is reserved\n"); - return -EFAULT; + return 4; } } diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 556cd818eccf..bd2233805d99 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -202,9 +201,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -300,6 +296,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) {} +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 40e9034db601..3c1b55ecc578 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define __hyp_text __section(.hyp.text) notrace diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h deleted file mode 100644 index 32fbf82e3ebc..000000000000 --- a/arch/arm/include/asm/kvm_mmio.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#ifndef __ARM_KVM_MMIO_H__ -#define __ARM_KVM_MMIO_H__ - -#include -#include -#include - -struct kvm_decode { - unsigned long rt; - bool sign_extend; - /* Not used on 32-bit arm */ - bool sixty_four; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM_KVM_MMIO_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 53ea7637b7b2..688c63412cc2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -341,7 +340,7 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c61260cf63c5..f6a77ddab956 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -325,9 +324,6 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* IO related fields */ - struct kvm_decode mmio_decode; - /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; @@ -491,6 +487,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +/* MMIO helpers */ +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa); + int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h deleted file mode 100644 index b204501a0c39..000000000000 --- a/arch/arm64/include/asm/kvm_mmio.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall - */ - -#ifndef __ARM64_KVM_MMIO_H__ -#define __ARM64_KVM_MMIO_H__ - -#include -#include - -struct kvm_decode { - unsigned long rt; - bool sign_extend; - /* Witdth of the register accessed by the faulting instruction is 64-bits */ - bool sixty_four; -}; - -void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); -unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); - -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); - -#endif /* __ARM64_KVM_MMIO_H__ */ diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 1bb71acd53f2..aedfcff99ac5 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -5,7 +5,6 @@ */ #include -#include #include #include @@ -92,26 +91,23 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; - if (!run->mmio.is_write) { - len = run->mmio.len; - if (len > sizeof(unsigned long)) - return -EINVAL; - + if (!kvm_vcpu_dabt_iswrite(vcpu)) { + len = kvm_vcpu_dabt_get_as(vcpu); data = kvm_mmio_read_buf(run->mmio.data, len); - if (vcpu->arch.mmio_decode.sign_extend && + if (kvm_vcpu_dabt_issext(vcpu) && len < sizeof(unsigned long)) { mask = 1U << ((len * 8) - 1); data = (data ^ mask) - mask; } - if (!vcpu->arch.mmio_decode.sixty_four) + if (!kvm_vcpu_dabt_issf(vcpu)) data = data & 0xffffffff; trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); - vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); } /* @@ -123,36 +119,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) -{ - unsigned long rt; - int access_size; - bool sign_extend; - bool sixty_four; - - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - /* page table accesses IO mem: tell guest to fix its TTBR */ - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - - access_size = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(access_size < 0)) - return access_size; - - *is_write = kvm_vcpu_dabt_iswrite(vcpu); - sign_extend = kvm_vcpu_dabt_issext(vcpu); - sixty_four = kvm_vcpu_dabt_issf(vcpu); - rt = kvm_vcpu_dabt_get_rd(vcpu); - - *len = access_size; - vcpu->arch.mmio_decode.sign_extend = sign_extend; - vcpu->arch.mmio_decode.rt = rt; - vcpu->arch.mmio_decode.sixty_four = sixty_four; - - return 0; -} - int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { @@ -164,15 +130,10 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, u8 data_buf[8]; /* - * Prepare MMIO operation. First decode the syndrome data we get - * from the CPU. Then try if some in-kernel emulation feels - * responsible, otherwise let user space do its magic. + * No valid syndrome? Ask userspace for help if it has + * voluntered to do so, and bail out otherwise. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, &is_write, &len); - if (ret) - return ret; - } else { + if (!kvm_vcpu_dabt_isvalid(vcpu)) { if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { run->exit_reason = KVM_EXIT_ARM_NISV; run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); @@ -184,7 +145,20 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, return -ENOSYS; } - rt = vcpu->arch.mmio_decode.rt; + /* Page table accesses IO mem: tell guest to fix its TTBR */ + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + /* + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. + */ + is_write = kvm_vcpu_dabt_iswrite(vcpu); + len = kvm_vcpu_dabt_get_as(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); if (is_write) { data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e3ad95013192..a4fa81d75e84 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include -- cgit From 290a6bb06de9ec24cecbb11bf4be35411d0b2625 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 20 Jan 2020 14:08:25 +0100 Subject: arm64: KVM: Add UAPI notes for swapped registers Two UAPI system register IDs do not derive their values from the ARM system register encodings. This is because their values were accidentally swapped. As the IDs are API, they cannot be changed. Add WARNING notes to point them out. Suggested-by: Marc Zyngier Signed-off-by: Andrew Jones [maz: turned XXX into WARNING] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200120130825.28838-1-drjones@redhat.com --- Documentation/virt/kvm/api.txt | 9 +++++++++ arch/arm64/include/uapi/asm/kvm.h | 12 ++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index ebb37b34dcfc..3a0c819c3573 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -2196,6 +2196,15 @@ arm64 CCSIDR registers are demultiplexed by CSSELR value: arm64 system registers have the following id bit patterns: 0x6030 0000 0013 +WARNING: + Two system register IDs do not follow the specified pattern. These + are KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT, which map to + system registers CNTV_CVAL_EL0 and CNTVCT_EL0 respectively. These + two had their values accidentally swapped, which means TIMER_CVAL is + derived from the register encoding for CNTVCT_EL0 and TIMER_CNT is + derived from the register encoding for CNTV_CVAL_EL0. As this is + API, it must remain this way. + arm64 firmware pseudo-registers have the following bit pattern: 0x6030 0000 0014 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 820e5751ada7..ba85bb23f060 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -220,10 +220,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) #define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) -/* EL0 Virtual Timer Registers */ +/* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) /* KVM-as-firmware specific pseudo-registers */ #define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) -- cgit From 6645d8542ef922486b733d415d2bec3b0622c27e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Jan 2020 12:47:06 +0000 Subject: arm64: KVM: Annotate guest entry/exit as a single function In an effort to clarify and simplify the annotations of assembly functions in the kernel new macros have been introduced replacing ENTRY and ENDPROC. There are separate annotations SYM_FUNC_ for normal C functions and SYM_CODE_ for other code. Currently __guest_enter and __guest_exit are annotated as standard functions but this is not entirely correct as the former doesn't do a normal return and the latter is not entered in a normal fashion. From the point of view of the hypervisor the guest entry/exit may be viewed as a single function which happens to have an eret in the middle of it so let's annotate it as such. Suggested-by: Mark Rutland Signed-off-by: Mark Brown Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200120124706.8681-1-broonie@kernel.org --- arch/arm64/kvm/hyp/entry.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e5cc8d66bf53..5b76a89939b1 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,7 +44,7 @@ * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); */ -ENTRY(__guest_enter) +SYM_FUNC_START(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros @@ -96,9 +96,8 @@ alternative_else_nop_endif // Do not touch any register after this! eret sb -ENDPROC(__guest_enter) -ENTRY(__guest_exit) +SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // x0: return code // x1: vcpu // x2-x29,lr: vcpu regs @@ -192,4 +191,4 @@ abort_guest_exit_end: msr spsr_el2, x4 orr x0, x0, x5 1: ret -ENDPROC(__guest_exit) +SYM_FUNC_END(__guest_enter) -- cgit From cf2d23e0bac9f6b5cd1cba8898f5f05ead40e530 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 21 Jan 2020 16:56:59 +1100 Subject: KVM: arm/arm64: Fix young bit from mmu notifier kvm_test_age_hva() is called upon mmu_notifier_test_young(), but wrong address range has been passed to handle_hva_to_gpa(). With the wrong address range, no young bits will be checked in handle_hva_to_gpa(). It means zero is always returned from mmu_notifier_test_young(). This fixes the issue by passing correct address range to the underly function handle_hva_to_gpa(), so that the hardware young (access) bit will be visited. Fixes: 35307b9a5f7e ("arm/arm64: KVM: Implement Stage-2 page aging") Signed-off-by: Gavin Shan Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200121055659.19560-1-gshan@redhat.com --- virt/kvm/arm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a4fa81d75e84..8a9db95d1e42 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2144,7 +2144,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) -- cgit From 018f22f95e8a6c3e27188b7317ef2c70a34cb2cd Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jan 2020 12:33:55 +0000 Subject: KVM: arm: Fix DFSR setting for non-LPAE aarch32 guests Beata reports that KVM_SET_VCPU_EVENTS doesn't inject the expected exception to a non-LPAE aarch32 guest. The host intends to inject DFSR.FS=0x14 "IMPLEMENTATION DEFINED fault (Lockdown fault)", but the guest receives DFSR.FS=0x04 "Fault on instruction cache maintenance". This fault is hooked by do_translation_fault() since ARMv6, which goes on to silently 'handle' the exception, and restart the faulting instruction. It turns out, when TTBCR.EAE is clear DFSR is split, and FS[4] has to shuffle up to DFSR[10]. As KVM only does this in one place, fix up the static values. We now get the expected: | Unhandled fault: lock abort (0x404) at 0x9c800f00 Fixes: 74a64a981662a ("KVM: arm/arm64: Unify 32bit fault injection") Reported-by: Beata Michalska Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200121123356.203000-2-james.morse@arm.com --- virt/kvm/arm/aarch32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 631d397ac81b..2da482ca7067 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -181,10 +181,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, /* Give the guest an IMPLEMENTATION DEFINED exception */ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) + if (is_lpae) { *fsr = 1 << 9 | 0x34; - else - *fsr = 0x14; + } else { + /* Surprise! DFSR's FS[4] lives in bit 10 */ + *fsr = BIT(10) | 0x4; /* 0x14 */ + } } void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -- cgit From 21aecdbd7f3ab02c9b82597dc733ee759fb8b274 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jan 2020 12:33:56 +0000 Subject: KVM: arm: Make inject_abt32() inject an external abort instead KVM's inject_abt64() injects an external-abort into an aarch64 guest. The KVM_CAP_ARM_INJECT_EXT_DABT is intended to do exactly this, but for an aarch32 guest inject_abt32() injects an implementation-defined exception, 'Lockdown fault'. Change this to external abort. For non-LPAE we now get the documented: | Unhandled fault: external abort on non-linefetch (0x008) at 0x9c800f00 and for LPAE: | Unhandled fault: synchronous external abort (0x210) at 0x9c800f00 Fixes: 74a64a981662a ("KVM: arm/arm64: Unify 32bit fault injection") Reported-by: Beata Michalska Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200121123356.203000-3-james.morse@arm.com --- virt/kvm/arm/aarch32.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 2da482ca7067..0a356aa91aa1 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -15,6 +15,10 @@ #include #include +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ @@ -182,10 +186,10 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, /* Give the guest an IMPLEMENTATION DEFINED exception */ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); if (is_lpae) { - *fsr = 1 << 9 | 0x34; + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; } else { - /* Surprise! DFSR's FS[4] lives in bit 10 */ - *fsr = BIT(10) | 0x4; /* 0x14 */ + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; } } -- cgit From 3837407c1aa1101ed5e214c7d6041e7a23335c6e Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 24 Jan 2020 15:25:32 +0100 Subject: KVM: arm64: pmu: Don't increment SW_INCR if PMCR.E is unset The specification says PMSWINC increments PMEVCNTR_EL1 by 1 if PMEVCNTR_EL0 is enabled and configured to count SW_INCR. For PMEVCNTR_EL0 to be enabled, we need both PMCNTENSET to be set for the corresponding event counter but we also need the PMCR.E bit to be set. Fixes: 7a0adc7064b8 ("arm64: KVM: Add access handler for PMSWINC register") Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Andrew Murray Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20200124142535.29386-2-eric.auger@redhat.com --- virt/kvm/arm/pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8731dfeced8b..c3f8b059881e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -486,6 +486,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) if (val == 0) return; + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { if (!(val & BIT(i))) -- cgit From 76c9fc56ddfdfeb0c9ff984d0d63b083e608fc92 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 24 Jan 2020 15:25:33 +0100 Subject: KVM: arm64: pmu: Don't mark a counter as chained if the odd one is disabled At the moment we update the chain bitmap on type setting. This does not take into account the enable state of the odd register. Let's make sure a counter is never considered as chained if the high counter is disabled. We recompute the chain state on enable/disable and type changes. Also let create_perf_event() use the chain bitmap and not use kvm_pmu_idx_has_chain_evtype(). Suggested-by: Marc Zyngier Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200124142535.29386-3-eric.auger@redhat.com --- virt/kvm/arm/pmu.c | 62 +++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index c3f8b059881e..9f605e0b8dd7 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -15,6 +15,8 @@ #include static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 @@ -75,6 +77,13 @@ static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) return pmc; } +static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + else + return pmc + 1; +} /** * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain @@ -294,15 +303,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute set. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) { @@ -335,15 +338,9 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) pmc = &pmu->pmc[i]; - /* - * For high counters of chained events we must recreate the - * perf event with the long (64bit) attribute unset. - */ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(i)) { - kvm_pmu_create_perf_event(vcpu, i); - continue; - } + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); /* At this point, pmc must be the canonical */ if (pmc->perf_event) @@ -585,15 +582,14 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + if (kvm_pmu_pmc_is_chained(pmc)) { /** * The initial sample period (overflow count) of an event. For * chained counters we only support overflow interrupts on the * high counter. */ attr.sample_period = (-counter) & GENMASK(63, 0); - if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1)) - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, @@ -624,25 +620,33 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) * @select_idx: The number of selected counter * * Update the chained bitmap based on the event type written in the - * typer register. + * typer register and the enable state of the odd register. */ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) { struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; + bool new_state, old_state; + + old_state = kvm_pmu_pmc_is_chained(pmc); + new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && + kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); - if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) { + if (old_state == new_state) + return; + + canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); + kvm_pmu_stop_counter(vcpu, canonical_pmc); + if (new_state) { /* * During promotion from !chained to chained we must ensure * the adjacent counter is stopped and its event destroyed */ - if (!kvm_pmu_pmc_is_chained(pmc)) - kvm_pmu_stop_counter(vcpu, pmc); - + kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - } else { - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return; } + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); } /** -- cgit From aa76829171e98bd75a0cc00b6248eca269ac7f4f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 24 Jan 2020 15:25:34 +0100 Subject: KVM: arm64: pmu: Fix chained SW_INCR counters At the moment a SW_INCR counter always overflows on 32-bit boundary, independently on whether the n+1th counter is programmed as CHAIN. Check whether the SW_INCR counter is a 64b counter and if so, implement the 64b logic. Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters") Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200124142535.29386-4-eric.auger@redhat.com --- virt/kvm/arm/pmu.c | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 9f605e0b8dd7..560db6282137 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -477,28 +477,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { + struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; - u64 type, enable, reg; - - if (val == 0) - return; if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; - enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + if (!(val & BIT(i))) continue; - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) - & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) - && (enable & BIT(i))) { - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (!reg) - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } -- cgit From c01d6a18023b94fdd0cb7cf11bbfe769bf71653f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 24 Jan 2020 15:25:35 +0100 Subject: KVM: arm64: pmu: Only handle supported event counters Let the code never use unsupported event counters. Change kvm_pmu_handle_pmcr() to only reset supported counters and kvm_pmu_vcpu_reset() to only stop supported counters. Other actions are filtered on the supported counters in kvm/sysregs.c Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200124142535.29386-5-eric.auger@redhat.com --- virt/kvm/arm/pmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 560db6282137..f0d0312c0a55 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -247,10 +247,11 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) */ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { - int i; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); @@ -527,10 +528,9 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - u64 mask; + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); int i; - mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); @@ -542,7 +542,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) + for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } } -- cgit From 4a267aa707953a9a73d1f5dc7f894dd9024a92be Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 27 Jan 2020 10:36:52 +0000 Subject: KVM: arm64: Treat emulated TVAL TimerValue as a signed 32-bit integer According to the ARM ARM, registers CNT{P,V}_TVAL_EL0 have bits [63:32] RES0 [1]. When reading the register, the value is truncated to the least significant 32 bits [2], and on writes, TimerValue is treated as a signed 32-bit integer [1, 2]. When the guest behaves correctly and writes 32-bit values, treating TVAL as an unsigned 64 bit register works as expected. However, things start to break down when the guest writes larger values, because (u64)0x1_ffff_ffff = 8589934591. but (s32)0x1_ffff_ffff = -1, and the former will cause the timer interrupt to be asserted in the future, but the latter will cause it to be asserted now. Let's treat TVAL as a signed 32-bit register on writes, to match the behaviour described in the architecture, and the behaviour experimentally exhibited by the virtual timer on a non-vhe host. [1] Arm DDI 0487E.a, section D13.8.18 [2] Arm DDI 0487E.a, section D11.2.4 Signed-off-by: Alexandru Elisei [maz: replaced the read-side mask with lower_32_bits] Signed-off-by: Marc Zyngier Fixes: 8fa761624871 ("KVM: arm/arm64: arch_timer: Fix CNTP_TVAL calculation") Link: https://lore.kernel.org/r/20200127103652.2326-1-alexandru.elisei@arm.com --- virt/kvm/arm/arch_timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f182b2380345..c6c2a9dde00c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); break; case TIMER_REG_CTL: @@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val; + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; break; case TIMER_REG_CTL: -- cgit