diff options
Diffstat (limited to 'arch/arm64/kvm/vgic/vgic-v3.c')
| -rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3.c | 1009 |
1 files changed, 1009 insertions, 0 deletions
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c new file mode 100644 index 000000000000..1d6dd1b545bd --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -0,0 +1,1009 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kstrtox.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/string_choices.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_asm.h> + +#include "vgic-mmio.h" +#include "vgic.h" + +static bool group0_trap; +static bool group1_trap; +static bool common_trap; +static bool dir_trap; +static bool gicv4_enable; + +void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, + struct ap_list_summary *als) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + cpuif->vgic_hcr = ICH_HCR_EL2_En; + + if (irqs_pending_outside_lrs(als)) + cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE; + if (irqs_active_outside_lrs(als)) + cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE; + if (irqs_outside_lrs(als)) + cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; + + if (!als->nr_sgi) + cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; + + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ? + ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; + cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ? + ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; + + /* + * Dealing with EOImode=1 is a massive source of headache. Not + * only do we need to track that we have active interrupts + * outside of the LRs and force DIR to be trapped, we also + * need to deal with SPIs that can be deactivated on another + * CPU. + * + * On systems that do not implement TDIR, force the bit in the + * shadow state anyway to avoid IPI-ing on these poor sods. + * + * Note that we set the trap irrespective of EOIMode, as that + * can change behind our back without any warning... + */ + if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) || + irqs_active_outside_lrs(als) || + atomic_read(&vcpu->kvm->arch.vgic.active_spis)) + cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR; +} + +static bool lr_signals_eoi_mi(u64 lr_val) +{ + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); +} + +static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val) +{ + struct vgic_irq *irq; + bool is_v2_sgi = false; + bool deactivated; + u32 intid; + + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + intid = val & ICH_LR_VIRTUAL_ID_MASK; + } else { + intid = val & GICH_LR_VIRTUALID; + is_v2_sgi = vgic_irq_is_sgi(intid); + } + + irq = vgic_get_vcpu_irq(vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + return; + + scoped_guard(raw_spinlock, &irq->irq_lock) { + /* Always preserve the active bit for !LPIs, note deactivation */ + if (irq->intid >= VGIC_MIN_LPI) + val &= ~ICH_LR_ACTIVE_BIT; + deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); + irq->active = !!(val & ICH_LR_ACTIVE_BIT); + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & ICH_LR_PENDING_BIT)) + irq->pending_latch = true; + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; + + if (is_v2_sgi) { + u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val); + + if (irq->active) + irq->active_source = cpuid; + + if (val & ICH_LR_PENDING_BIT) + irq->source |= BIT(cpuid); + } + + /* Handle resampling for mapped interrupts if required */ + vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); + + irq->on_lr = false; + } + + /* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */ + if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) { + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis); + } + + vgic_put_irq(vcpu->kvm, irq); +} + +static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); + +static void vgic_v3_deactivate_phys(u32 intid) +{ + if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) + gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI); + else + gic_write_dir(intid); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; + u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); + struct vgic_irq *irq; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + for (int lr = 0; lr < cpuif->used_lrs; lr++) + vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]); + + /* + * EOIMode=0: use EOIcount to emulate deactivation. We are + * guaranteed to deactivate in reverse order of the activation, so + * just pick one active interrupt after the other in the ap_list, + * and replay the deactivation as if the CPU was doing it. We also + * rely on priority drop to have taken place, and the list to be + * sorted by priority. + */ + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + u64 lr; + + /* + * I would have loved to write this using a scoped_guard(), + * but using 'continue' here is a total train wreck. + */ + if (!eoicount) { + break; + } else { + guard(raw_spinlock)(&irq->irq_lock); + + if (!(likely(vgic_target_oracle(irq) == vcpu) && + irq->active)) + continue; + + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; + } + + if (lr & ICH_LR_HW) + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + + vgic_v3_fold_lr(vcpu, lr); + eoicount--; + } + + cpuif->used_lrs = 0; +} + +void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + struct kvm_vcpu *target_vcpu = NULL; + bool mmio = false, is_v2_sgi; + struct vgic_irq *irq; + unsigned long flags; + u64 lr = 0; + u8 cpuid; + + /* Snapshot CPUID, and remove it from the INTID */ + cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); + val &= ~GENMASK_ULL(12, 10); + + is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 && + val < VGIC_NR_SGIS); + + /* + * We only deal with DIR when EOIMode==1, and only for SGI, + * PPI or SPI. + */ + if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) || + val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) + return; + + /* Make sure we're in the same context as LR handling */ + local_irq_save(flags); + + irq = vgic_get_vcpu_irq(vcpu, val); + if (WARN_ON_ONCE(!irq)) + goto out; + + /* + * EOIMode=1: we must rely on traps to handle deactivate of + * overflowing interrupts, as there is no ordering guarantee and + * EOIcount isn't being incremented. Priority drop will have taken + * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs. + * + * Three possibities: + * + * - The irq is not queued on any CPU, and there is nothing to + * do, + * + * - Or the irq is in an LR, meaning that its state is not + * directly observable. Treat it bluntly by making it as if + * this was a write to GICD_ICACTIVER, which will force an + * exit on all vcpus. If it hurts, don't do that. + * + * - Or the irq is active, but not in an LR, and we can + * directly deactivate it by building a pseudo-LR, fold it, + * and queue a request to prune the resulting ap_list, + * + * Special care must be taken to match the source CPUID when + * deactivating a GICv2 SGI. + */ + scoped_guard(raw_spinlock, &irq->irq_lock) { + target_vcpu = irq->vcpu; + + /* Not on any ap_list? */ + if (!target_vcpu) + goto put; + + /* + * Urgh. We're deactivating something that we cannot + * observe yet... Big hammer time. + */ + if (irq->on_lr) { + mmio = true; + goto put; + } + + /* GICv2 SGI: check that the cpuid matches */ + if (is_v2_sgi && irq->active_source != cpuid) { + target_vcpu = NULL; + goto put; + } + + /* (with a Dalek voice) DEACTIVATE!!!! */ + lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; + } + + if (lr & ICH_LR_HW) + vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + + vgic_v3_fold_lr(vcpu, lr); + +put: + vgic_put_irq(vcpu->kvm, irq); + +out: + local_irq_restore(flags); + + if (mmio) + vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); + + /* Force the ap_list to be pruned */ + if (target_vcpu) + kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); +} + +/* Requires the irq to be locked already */ +static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u64 val = irq->intid; + bool allow_pending = true, is_v2_sgi; + + WARN_ON(irq->on_lr); + + is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2); + + if (irq->active) { + val |= ICH_LR_ACTIVE_BIT; + if (is_v2_sgi) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= ICH_LR_EOI; + } + } + + if (irq->hw && !vgic_irq_needs_resampling(irq)) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= ICH_LR_PENDING_BIT; + + if (is_v2_sgi) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return 0; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + if (irq->source & ~BIT(src - 1)) + val |= ICH_LR_EOI; + } + } + + if (irq->group) + val |= ICH_LR_GROUP; + + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; + + return val; +} + +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u64 val = vgic_v3_compute_lr(vcpu, irq); + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; + + if (val & ICH_LR_PENDING_BIT) { + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2) { + u32 src = ffs(irq->source); + + irq->source &= ~BIT(src - 1); + if (irq->source) + irq->pending_latch = true; + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + + irq->on_lr = true; +} + +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; +} + +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & + ICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & + ICH_VMCR_FIQ_EN_MASK; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcr = ICH_VMCR_FIQ_EN_MASK; + } + + vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> + ICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> + ICH_VMCR_FIQ_EN_SHIFT; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcrp->fiqen = 1; + vmcrp->ackctl = 0; + } + + vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; +} + +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + +void vgic_v3_reset(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_v3->vgic_vmcr = 0; + + /* + * If we are emulating a GICv3, we do it in an non-GICv2-compatible + * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. + * This goes with the spec allowing the value to be RAO/WI. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { + vgic_v3->vgic_sre = 0; + } + + vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits, + kvm_vgic_global_state.ich_vtr_el2); + vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, + kvm_vgic_global_state.ich_vtr_el2) + 1; +} + +void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (!vgic_is_v3(vcpu->kvm)) + return; + + /* Hide GICv3 sysreg if necessary */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || + !irqchip_in_kernel(vcpu->kvm)) + vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | + ICH_HCR_EL2_TC); +} + +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) +{ + struct kvm_vcpu *vcpu; + int byte_offset, bit_nr; + gpa_t pendbase, ptr; + bool status; + u8 val; + int ret; + unsigned long flags; + +retry: + vcpu = irq->target_vcpu; + if (!vcpu) + return 0; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + + status = val & (1 << bit_nr); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->target_vcpu != vcpu) { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + goto retry; + } + irq->pending_latch = status; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + if (status) { + /* clear consumed data */ + val &= ~(1 << bit_nr); + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/* + * The deactivation of the doorbell interrupt will trigger the + * unmapping of the associated vPE. + */ +static void unmap_all_vpes(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int i; + + for (i = 0; i < dist->its_vm.nr_vpes; i++) + free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); +} + +static void map_all_vpes(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int i; + + for (i = 0; i < dist->its_vm.nr_vpes; i++) + WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), + dist->its_vm.vpes[i]->irq)); +} + +/* + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM + * kvm lock and all vcpu lock must be held + */ +int vgic_v3_save_pending_tables(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; + bool vlpi_avail = false; + unsigned long index; + int ret = 0; + u8 val; + + if (unlikely(!vgic_initialized(kvm))) + return -ENXIO; + + /* + * A preparation for getting any VLPI states. + * The above vgic initialized check also ensures that the allocation + * and enabling of the doorbells have already been done. + */ + if (kvm_vgic_global_state.has_gicv4_1) { + unmap_all_vpes(kvm); + vlpi_avail = true; + } + + xa_for_each(&dist->lpi_xa, index, irq) { + int byte_offset, bit_nr; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr; + bool is_pending; + bool stored; + + vcpu = irq->target_vcpu; + if (!vcpu) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + if (ptr != last_ptr) { + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + goto out; + last_ptr = ptr; + } + + stored = val & (1U << bit_nr); + + is_pending = irq->pending_latch; + + if (irq->hw && vlpi_avail) + vgic_v4_get_vlpi_state(irq, &is_pending); + + if (stored == is_pending) + continue; + + if (is_pending) + val |= 1 << bit_nr; + else + val &= ~(1 << bit_nr); + + ret = vgic_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + goto out; + } + +out: + if (vlpi_avail) + map_all_vpes(kvm); + + return ret; +} + +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + +/* + * Check for overlapping regions and for regions crossing the end of memory + * for base addresses which have already been set. + */ +bool vgic_v3_check_base(struct kvm *kvm) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) + return false; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + size_t sz = vgic_v3_rd_region_size(kvm, rdreg); + + if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, + rdreg->base, SZ_64K, sz)) + return false; + } + + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) + return true; + + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} + +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + +int vgic_v3_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + unsigned long c; + + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %ld redistributor base not set\n", c); + return -ENXIO; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { + kvm_debug("Need to set vgic distributor addresses first\n"); + return -ENXIO; + } + + if (!vgic_v3_check_base(kvm)) { + kvm_debug("VGIC redist and dist frames overlap\n"); + return -EINVAL; + } + + /* + * For a VGICv3 we require the userland to explicitly initialize + * the VGIC before we need to use it. + */ + if (!vgic_initialized(kvm)) { + return -EBUSY; + } + + if (kvm_vgic_global_state.has_gicv4_1) + vgic_v4_configure_vsgis(kvm); + + return 0; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat); + +static int __init early_group0_trap_cfg(char *buf) +{ + return kstrtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + +static int __init early_group1_trap_cfg(char *buf) +{ + return kstrtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + +static int __init early_common_trap_cfg(char *buf) +{ + return kstrtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + +static int __init early_gicv4_enable(char *buf) +{ + return kstrtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return (is_kernel_in_hyp_mode() && + is_midr_in_range_list(broken_seis) && + (read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS)); +} + +void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + u32 insn, oinsn, rd; + u64 hcr = 0; + + if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } + + if (vgic_v3_broken_seis()) { + /* We know that these machines have ICH_HCR_EL2.TDIR */ + group0_trap = true; + group1_trap = true; + dir_trap = true; + } + + if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR)) + common_trap = true; + + if (group0_trap) + hcr |= ICH_HCR_EL2_TALL0; + if (group1_trap) + hcr |= ICH_HCR_EL2_TALL1; + if (common_trap) + hcr |= ICH_HCR_EL2_TC; + if (dir_trap) + hcr |= ICH_HCR_EL2_TDIR; + + /* Compute target register */ + oinsn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + + /* movz rd, #(val & 0xffff) */ + insn = aarch64_insn_gen_movewide(rd, + (u16)hcr, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr = cpu_to_le32(insn); +} + +/** + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv3 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v3_probe(const struct gic_kvm_info *info) +{ + u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); + bool has_v2; + u64 traps; + int ret; + + has_v2 = ich_vtr_el2 >> 63; + ich_vtr_el2 = (u32)ich_vtr_el2; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; + + /* GICv4 support? */ + if (info->has_v4) { + kvm_vgic_global_state.has_gicv4 = gicv4_enable; + kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; + kvm_info("GICv4%s support %s\n", + kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", + str_enabled_disabled(gicv4_enable)); + } + + kvm_vgic_global_state.vcpu_base = 0; + + if (!info->vcpu.start) { + kvm_info("GICv3: no GICV resource entry\n"); + } else if (!has_v2) { + pr_warn(FW_BUG "CPU interface incapable of MMIO access\n"); + } else if (!PAGE_ALIGNED(info->vcpu.start)) { + pr_warn("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)info->vcpu.start); + } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.can_emulate_gicv2 = true; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } + kvm_info("vgic-v2@%llx\n", info->vcpu.start); + } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + + if (kvm_vgic_global_state.vcpu_base == 0) + kvm_info("disabling GICv2 emulation\n"); + + /* + * Flip the static branch if the HW supports v2, even if we're + * not using it (such as in protected mode). + */ + if (has_v2) + static_branch_enable(&vgic_v3_has_v2_compat); + + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; + } + + traps = vgic_ich_hcr_trap_bits(); + if (traps) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", + (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", + (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", + (traps & ICH_HCR_EL2_TC) ? "C" : "", + (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } + + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.type = VGIC_V3; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + return 0; +} + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* If the vgic is nested, perform the full state loading */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_load_nested(vcpu); + return; + } + + if (likely(!is_protected_kvm_enabled())) + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); + + if (has_vhe()) + __vgic_v3_activate_traps(cpu_if); + + WARN_ON(vgic_v4_load(vcpu)); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (vgic_state_is_nested(vcpu)) { + vgic_v3_put_nested(vcpu); + return; + } + + if (likely(!is_protected_kvm_enabled())) + kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); + WARN_ON(vgic_v4_put(vcpu)); + + if (has_vhe()) + __vgic_v3_deactivate_traps(cpu_if); +} |
