From f39d16cbabf9f939745a3850a33760910d22ef35 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 19 Oct 2016 12:40:17 +0200 Subject: KVM: arm/arm64: Guard kvm_vgic_map_is_active against !vgic_initialized If the vgic is not initialized, don't try to grab its spinlocks or traverse its data structures. This is important because we soon have to start considering the active state of a virtual interrupts when doing vcpu_load, which may happen early on before the vgic is initialized. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index fed717e07938..e1f7dbcfece0 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -777,6 +777,9 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); bool map_is_active; + if (!vgic_initialized(vcpu->kvm)) + return false; + spin_lock(&irq->irq_lock); map_is_active = irq->hw && irq->active; spin_unlock(&irq->irq_lock); -- cgit From 006df0f34930e18d0aa52f05705bdfe1fc565943 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 16 Oct 2016 22:19:11 +0200 Subject: KVM: arm/arm64: Support calling vgic_update_irq_pending from irq context We are about to optimize our timer handling logic which involves injecting irqs to the vgic directly from the irq handler. Unfortunately, the injection path can take any AP list lock and irq lock and we must therefore make sure to use spin_lock_irqsave where ever interrupts are enabled and we are taking any of those locks, to avoid deadlocking between process context and the ISR. This changes a lot of the VGIC code, but the good news are that the changes are mostly mechanical. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 17 +++++++----- virt/kvm/arm/vgic/vgic-mmio-v2.c | 22 +++++++++------ virt/kvm/arm/vgic/vgic-mmio-v3.c | 17 +++++++----- virt/kvm/arm/vgic/vgic-mmio.c | 44 ++++++++++++++++++------------ virt/kvm/arm/vgic/vgic-v2.c | 5 ++-- virt/kvm/arm/vgic/vgic-v3.c | 12 ++++---- virt/kvm/arm/vgic/vgic.c | 59 ++++++++++++++++++++++++---------------- virt/kvm/arm/vgic/vgic.h | 3 +- 8 files changed, 107 insertions(+), 72 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index f51c1e1b3f70..9f5e34767628 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -278,6 +278,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); u8 prop; int ret; + unsigned long flags; ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, &prop, 1); @@ -285,15 +286,15 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, if (ret) return ret; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { irq->priority = LPI_PROP_PRIORITY(prop); irq->enabled = LPI_PROP_ENABLE_BIT(prop); - vgic_queue_irq_unlock(kvm, irq); + vgic_queue_irq_unlock(kvm, irq, flags); } else { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } return 0; @@ -393,6 +394,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) int ret = 0; u32 *intids; int nr_irqs, i; + unsigned long flags; nr_irqs = vgic_copy_lpi_list(vcpu, &intids); if (nr_irqs < 0) @@ -420,9 +422,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -515,6 +517,7 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, { struct kvm_vcpu *vcpu; struct its_ite *ite; + unsigned long flags; if (!its->enabled) return -EBUSY; @@ -530,9 +533,9 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; - spin_lock(&ite->irq->irq_lock); + spin_lock_irqsave(&ite->irq->irq_lock, flags); ite->irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, ite->irq); + vgic_queue_irq_unlock(kvm, ite->irq, flags); return 0; } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index b3d4a10f09a1..e21e2f49b005 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -74,6 +74,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, int mode = (val >> 24) & 0x03; int c; struct kvm_vcpu *vcpu; + unsigned long flags; switch (mode) { case 0x0: /* as specified by targets */ @@ -97,11 +98,11 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; irq->source |= 1U << source_vcpu->vcpu_id; - vgic_queue_irq_unlock(source_vcpu->kvm, irq); + vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags); vgic_put_irq(source_vcpu->kvm, irq); } } @@ -131,6 +132,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 8); u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); int i; + unsigned long flags; /* GICD_ITARGETSR[0-7] are read-only */ if (intid < VGIC_NR_PRIVATE_IRQS) @@ -140,13 +142,13 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); int target; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->targets = (val >> (i * 8)) & cpu_mask; target = irq->targets ? __ffs(irq->targets) : 0; irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -174,17 +176,18 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, { u32 intid = addr & 0x0f; int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->source &= ~((val >> (i * 8)) & 0xff); if (!irq->source) irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -195,19 +198,20 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, { u32 intid = addr & 0x0f; int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->source |= (val >> (i * 8)) & 0xff; if (irq->source) { irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 408ef06638fc..83786108829e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -129,6 +129,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, { int intid = VGIC_ADDR_TO_INTID(addr, 64); struct vgic_irq *irq; + unsigned long flags; /* The upper word is WI for us since we don't implement Aff3. */ if (addr & 4) @@ -139,13 +140,13 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (!irq) return; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* We only care about and preserve Aff0, Aff1 and Aff2. */ irq->mpidr = val & GENMASK(23, 0); irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -241,11 +242,12 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i, &val)) { /* * pending_latch is set irrespective of irq type @@ -253,10 +255,10 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, * restore irq config before pending info. */ irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } vgic_put_irq(vcpu->kvm, irq); @@ -799,6 +801,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) int sgi, c; int vcpu_id = vcpu->vcpu_id; bool broadcast; + unsigned long flags; sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); @@ -837,10 +840,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index c1e4bdd66131..deb51ee16a3d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -69,13 +69,14 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -87,15 +88,16 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -126,14 +128,15 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -144,15 +147,16 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; + unsigned long flags; for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = false; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -181,7 +185,8 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool new_active_state) { struct kvm_vcpu *requester_vcpu; - spin_lock(&irq->irq_lock); + unsigned long flags; + spin_lock_irqsave(&irq->irq_lock, flags); /* * The vcpu parameter here can mean multiple things depending on how @@ -216,9 +221,9 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->active = new_active_state; if (new_active_state) - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); } /* @@ -352,14 +357,15 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 8); int i; + unsigned long flags; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -390,6 +396,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 2); int i; + unsigned long flags; for (i = 0; i < len * 4; i++) { struct vgic_irq *irq; @@ -404,14 +411,14 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, continue; irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) irq->config = VGIC_CONFIG_EDGE; else irq->config = VGIC_CONFIG_LEVEL; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } } @@ -443,6 +450,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, { int i; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + unsigned long flags; for (i = 0; i < 32; i++) { struct vgic_irq *irq; @@ -459,12 +467,12 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, * restore irq config before line level. */ new_level = !!(val & (1U << i)); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->line_level = new_level; if (new_level) - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e4187e52bb26..80897102da26 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -62,6 +62,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; int lr; + unsigned long flags; cpuif->vgic_hcr &= ~GICH_HCR_UIE; @@ -77,7 +78,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq = vgic_get_irq(vcpu->kvm, vcpu, intid); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Always preserve the active bit */ irq->active = !!(val & GICH_LR_ACTIVE_BIT); @@ -104,7 +105,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 96ea597db0e7..863351c090d8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -44,6 +44,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; u32 model = vcpu->kvm->arch.vgic.vgic_model; int lr; + unsigned long flags; cpuif->vgic_hcr &= ~ICH_HCR_UIE; @@ -66,7 +67,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) if (!irq) /* An LPI could have been unmapped. */ continue; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); /* Always preserve the active bit */ irq->active = !!(val & ICH_LR_ACTIVE_BIT); @@ -94,7 +95,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -278,6 +279,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) bool status; u8 val; int ret; + unsigned long flags; retry: vcpu = irq->target_vcpu; @@ -296,13 +298,13 @@ retry: status = val & (1 << bit_nr); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (irq->target_vcpu != vcpu) { - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); goto retry; } irq->pending_latch = status; - vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); if (status) { /* clear consumed data */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e1f7dbcfece0..e54ef2fdf73d 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -53,6 +53,10 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * vcpuX->vcpu_id < vcpuY->vcpu_id: * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * + * Since the VGIC must support injecting virtual interrupts from ISRs, we have + * to use the spin_lock_irqsave/spin_unlock_irqrestore versions of outer + * spinlocks for any lock that may be taken while injecting an interrupt. */ /* @@ -261,7 +265,8 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * Needs to be entered with the IRQ lock already held, but will return * with all locks dropped. */ -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq) +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) { struct kvm_vcpu *vcpu; @@ -279,7 +284,7 @@ retry: * not need to be inserted into an ap_list and there is also * no more work for us to do. */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); /* * We have to kick the VCPU here, because we could be @@ -301,11 +306,11 @@ retry: * We must unlock the irq lock to take the ap_list_lock where * we are going to insert this new pending interrupt. */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); /* someone can do stuff here, which we re-check below */ - spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); spin_lock(&irq->irq_lock); /* @@ -322,9 +327,9 @@ retry: if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); goto retry; } @@ -337,7 +342,7 @@ retry: irq->vcpu = vcpu; spin_unlock(&irq->irq_lock); - spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); @@ -367,6 +372,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, { struct kvm_vcpu *vcpu; struct vgic_irq *irq; + unsigned long flags; int ret; trace_vgic_update_irq_pending(cpuid, intid, level); @@ -383,11 +389,11 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, if (!irq) return -EINVAL; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(kvm, irq); return 0; } @@ -397,7 +403,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, else irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, irq); + vgic_queue_irq_unlock(kvm, irq, flags); vgic_put_irq(kvm, irq); return 0; @@ -406,15 +412,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + unsigned long flags; BUG_ON(!irq); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->hw = true; irq->hwintid = phys_irq; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return 0; @@ -423,6 +430,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) { struct vgic_irq *irq; + unsigned long flags; if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; @@ -430,12 +438,12 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); BUG_ON(!irq); - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->hw = false; irq->hwintid = 0; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return 0; @@ -486,9 +494,10 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq, *tmp; + unsigned long flags; retry: - spin_lock(&vgic_cpu->ap_list_lock); + spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; @@ -528,7 +537,7 @@ retry: /* This interrupt looks like it has to be migrated. */ spin_unlock(&irq->irq_lock); - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); /* * Ensure locking order by always locking the smallest @@ -542,7 +551,7 @@ retry: vcpuB = vcpu; } - spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, SINGLE_DEPTH_NESTING); spin_lock(&irq->irq_lock); @@ -566,11 +575,11 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); - spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags); goto retry; } - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); } static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) @@ -703,6 +712,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); @@ -735,11 +746,12 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; bool pending = false; + unsigned long flags; if (!vcpu->kvm->arch.vgic.enabled) return false; - spin_lock(&vgic_cpu->ap_list_lock); + spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); @@ -750,7 +762,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) break; } - spin_unlock(&vgic_cpu->ap_list_lock); + spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); return pending; } @@ -776,13 +788,14 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); bool map_is_active; + unsigned long flags; if (!vgic_initialized(vcpu->kvm)) return false; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); return map_is_active; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bf9ceab67c77..4f8aecb07ae6 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -140,7 +140,8 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); -bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); void vgic_kick_vcpus(struct kvm *kvm); int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, -- cgit From 8409a06f2a2c0baeb6e6ff020b2c5a4592b3078d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 17 Jun 2017 01:09:19 -0700 Subject: KVM: arm/arm64: Make timer_arm and timer_disarm helpers more generic We are about to add an additional soft timer to the arch timer state for a VCPU and would like to be able to reuse the functions to program and cancel a timer, so we make them slightly more generic and rename to make it more clear that these functions work on soft timers and not the hardware resource that this code is managing. The armed flag on the timer state is only used to assert a condition, and we don't rely on this assertion in any meaningful way, so we can simply get rid of this flack and slightly reduce complexity. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 8e89d63005c7..223230191195 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -56,26 +56,16 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } -static bool timer_is_armed(struct arch_timer_cpu *timer) +static void soft_timer_start(struct hrtimer *hrt, u64 ns) { - return timer->armed; -} - -/* timer_arm: as in "arm the timer", not as in ARM the company */ -static void timer_arm(struct arch_timer_cpu *timer, u64 ns) -{ - timer->armed = true; - hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), + hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), HRTIMER_MODE_ABS); } -static void timer_disarm(struct arch_timer_cpu *timer) +static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) { - if (timer_is_armed(timer)) { - hrtimer_cancel(&timer->timer); - cancel_work_sync(&timer->expired); - timer->armed = false; - } + hrtimer_cancel(hrt); + cancel_work_sync(work); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -271,7 +261,7 @@ static void kvm_timer_emulate(struct kvm_vcpu *vcpu, return; /* The timer has not yet expired, schedule a background timer */ - timer_arm(timer, kvm_timer_compute_delta(timer_ctx)); + soft_timer_start(&timer->timer, kvm_timer_compute_delta(timer_ctx)); } /* @@ -285,8 +275,6 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - BUG_ON(timer_is_armed(timer)); - /* * No need to schedule a background timer if any guest timer has * already expired, because kvm_vcpu_block will return before putting @@ -306,13 +294,14 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) * The guest timers have not yet expired, schedule a background timer. * Set the earliest expiration time among the guest timers. */ - timer_arm(timer, kvm_timer_earliest_exp(vcpu)); + soft_timer_start(&timer->timer, kvm_timer_earliest_exp(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer_disarm(timer); + + soft_timer_cancel(&timer->timer, &timer->expired); } static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) @@ -448,7 +437,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) * This is to cancel the background timer for the physical timer * emulation if it is set. */ - timer_disarm(timer); + soft_timer_cancel(&timer->timer, &timer->expired); /* * The guest could have modified the timer registers or the timer @@ -615,7 +604,7 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - timer_disarm(timer); + soft_timer_cancel(&timer->timer, &timer->expired); kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } -- cgit From 14d61fa98f03cb01f3aea7e3069fdf460caf5587 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 17 Jun 2017 07:33:02 -0700 Subject: KVM: arm/arm64: Rename soft timer to bg_timer As we are about to introduce a separate hrtimer for the physical timer, call this timer bg_timer, because we refer to this timer as the background timer in the code and comments elsewhere. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 223230191195..8ec392850e69 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -148,13 +148,13 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) return min(min_virt, min_phys); } -static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) +static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; struct kvm_vcpu *vcpu; u64 ns; - timer = container_of(hrt, struct arch_timer_cpu, timer); + timer = container_of(hrt, struct arch_timer_cpu, bg_timer); vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); /* @@ -261,7 +261,7 @@ static void kvm_timer_emulate(struct kvm_vcpu *vcpu, return; /* The timer has not yet expired, schedule a background timer */ - soft_timer_start(&timer->timer, kvm_timer_compute_delta(timer_ctx)); + soft_timer_start(&timer->bg_timer, kvm_timer_compute_delta(timer_ctx)); } /* @@ -294,14 +294,14 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) * The guest timers have not yet expired, schedule a background timer. * Set the earliest expiration time among the guest timers. */ - soft_timer_start(&timer->timer, kvm_timer_earliest_exp(vcpu)); + soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - soft_timer_cancel(&timer->timer, &timer->expired); + soft_timer_cancel(&timer->bg_timer, &timer->expired); } static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) @@ -437,7 +437,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) * This is to cancel the background timer for the physical timer * emulation if it is set. */ - soft_timer_cancel(&timer->timer, &timer->expired); + soft_timer_cancel(&timer->bg_timer, &timer->expired); /* * The guest could have modified the timer registers or the timer @@ -494,8 +494,8 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vcpu_ptimer(vcpu)->cntvoff = 0; INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); - hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - timer->timer.function = kvm_timer_expire; + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->bg_timer.function = kvm_bg_timer_expire; vtimer->irq.irq = default_vtimer_irq.irq; ptimer->irq.irq = default_ptimer_irq.irq; @@ -604,7 +604,7 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - soft_timer_cancel(&timer->timer, &timer->expired); + soft_timer_cancel(&timer->bg_timer, &timer->expired); kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } -- cgit From ee9bb9a1e3c6e40874c1611ac24b76c87d2cba7b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 16 Oct 2016 20:24:30 +0200 Subject: KVM: arm/arm64: Move timer/vgic flush/sync under disabled irq As we are about to play tricks with the timer to be more lazy in saving and restoring state, we need to move the timer sync and flush functions under a disabled irq section and since we have to flush the vgic state after the timer and PMU state, we do the whole flush/sync sequence with disabled irqs. The only downside is a slightly longer delay before being able to process hardware interrupts and run softirqs. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arm.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index b9f68e4add71..27db222a0c8d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -654,11 +654,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_flush_hwstate(vcpu); + local_irq_disable(); + kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); - local_irq_disable(); - /* * If we have a singal pending, or need to notify a userspace * irqchip about timer or PMU level changes, then we exit (and @@ -683,10 +683,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; - local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); + local_irq_enable(); preempt_enable(); continue; } @@ -709,6 +709,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_arm_clear_debug(vcpu); + /* + * We must sync the PMU and timer state before the vgic state so + * that the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_pmu_sync_hwstate(vcpu); + kvm_timer_sync_hwstate(vcpu); + + kvm_vgic_sync_hwstate(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -732,16 +742,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); - /* - * We must sync the PMU and timer state before the vgic state so - * that the vgic can properly sample the updated state of the - * interrupt line. - */ - kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); - - kvm_vgic_sync_hwstate(vcpu); - preempt_enable(); ret = handle_exit(vcpu, run, ret); -- cgit From f2a2129e0ac8d8fa79c3f85425c36f6e3368f022 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 18 Jun 2017 00:32:08 -0700 Subject: KVM: arm/arm64: Use separate timer for phys timer emulation We were using the same hrtimer for emulating the physical timer and for making sure a blocking VCPU thread would be eventually woken up. That worked fine in the previous arch timer design, but as we are about to actually use the soft timer expire function for the physical timer emulation, change the logic to use a dedicated hrtimer. This has the added benefit of not having to cancel any work in the sync path, which in turn allows us to run the flush and sync with IRQs disabled. Note that the hrtimer used to program the host kernel's timer to generate an exit from the guest when the emulated physical timer fires never has to inject any work, and to share the soft_timer_cancel() function with the bg_timer, we change the function to only cancel any pending work if the pointer to the work struct is not null. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 8ec392850e69..4ad853737c34 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -65,7 +65,8 @@ static void soft_timer_start(struct hrtimer *hrt, u64 ns) static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) { hrtimer_cancel(hrt); - cancel_work_sync(work); + if (work) + cancel_work_sync(work); } static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) @@ -172,6 +173,12 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) +{ + WARN(1, "Timer only used to ensure guest exit - unexpected event."); + return HRTIMER_NORESTART; +} + bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { u64 cval, now; @@ -249,7 +256,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) } /* Schedule the background timer for the emulated timer. */ -static void kvm_timer_emulate(struct kvm_vcpu *vcpu, +static void phys_timer_emulate(struct kvm_vcpu *vcpu, struct arch_timer_context *timer_ctx) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -261,7 +268,7 @@ static void kvm_timer_emulate(struct kvm_vcpu *vcpu, return; /* The timer has not yet expired, schedule a background timer */ - soft_timer_start(&timer->bg_timer, kvm_timer_compute_delta(timer_ctx)); + soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); } /* @@ -414,7 +421,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); /* Set the background timer for the physical timer emulation. */ - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + phys_timer_emulate(vcpu, vcpu_ptimer(vcpu)); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_timer_flush_hwstate_user(vcpu); @@ -437,7 +444,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) * This is to cancel the background timer for the physical timer * emulation if it is set. */ - soft_timer_cancel(&timer->bg_timer, &timer->expired); + soft_timer_cancel(&timer->phys_timer, NULL); /* * The guest could have modified the timer registers or the timer @@ -497,6 +504,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->bg_timer.function = kvm_bg_timer_expire; + hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->phys_timer.function = kvm_phys_timer_expire; + vtimer->irq.irq = default_vtimer_irq.irq; ptimer->irq.irq = default_ptimer_irq.irq; } @@ -605,6 +615,7 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); soft_timer_cancel(&timer->bg_timer, &timer->expired); + soft_timer_cancel(&timer->phys_timer, NULL); kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } -- cgit From 688c50aa72f64ca21767486e5eef876ec23e418c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 4 Jan 2017 16:10:28 +0100 Subject: KVM: arm/arm64: Move timer save/restore out of the hyp code As we are about to be lazy with saving and restoring the timer registers, we prepare by moving all possible timer configuration logic out of the hyp code. All virtual timer registers can be programmed from EL1 and since the arch timer is always a level triggered interrupt we can safely do this with interrupts disabled in the host kernel on the way to the guest without taking vtimer interrupts in the host kernel (yet). The downside is that the cntvoff register can only be programmed from hyp mode, so we jump into hyp mode and back to program it. This is also safe, because the host kernel doesn't use the virtual timer in the KVM code. It may add a little performance performance penalty, but only until following commits where we move this operation to vcpu load/put. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 48 +++++++++++++++++++++++++++++ virt/kvm/arm/hyp/timer-sr.c | 74 ++++++++++++++++++++------------------------- 2 files changed, 80 insertions(+), 42 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4ad853737c34..93c8973a71f4 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -271,6 +271,20 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu, soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); } +static void timer_save_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (timer->enabled) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); + } + + /* Disable the virtual timer */ + write_sysreg_el0(0, cntv_ctl); +} + /* * Schedule the background timer before calling kvm_vcpu_block, so that this * thread is removed from its waitqueue and made runnable when there's a timer @@ -304,6 +318,18 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); } +static void timer_restore_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (timer->enabled) { + write_sysreg_el0(vtimer->cnt_cval, cntv_cval); + isb(); + write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); + } +} + void kvm_timer_unschedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -311,6 +337,21 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) soft_timer_cancel(&timer->bg_timer, &timer->expired); } +static void set_cntvoff(u64 cntvoff) +{ + u32 low = lower_32_bits(cntvoff); + u32 high = upper_32_bits(cntvoff); + + /* + * Since kvm_call_hyp doesn't fully support the ARM PCS especially on + * 32-bit systems, but rather passes register by register shifted one + * place (we put the function address in r0/x0), we cannot simply pass + * a 64-bit value as an argument, but have to split the value in two + * 32-bit halves. + */ + kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); +} + static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); @@ -414,6 +455,7 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); if (unlikely(!timer->enabled)) return; @@ -427,6 +469,9 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) kvm_timer_flush_hwstate_user(vcpu); else kvm_timer_flush_hwstate_vgic(vcpu); + + set_cntvoff(vtimer->cntvoff); + timer_restore_state(vcpu); } /** @@ -446,6 +491,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) */ soft_timer_cancel(&timer->phys_timer, NULL); + timer_save_state(vcpu); + set_cntvoff(0); + /* * The guest could have modified the timer registers or the timer * could have expired, update the timer state. diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index 4734915ab71f..f39861639f08 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -21,58 +21,48 @@ #include -/* vcpu is already in the HYP VA space */ -void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) +{ + u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low; + write_sysreg(cntvoff, cntvoff_el2); +} + +void __hyp_text enable_el1_phys_timer_access(void) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); u64 val; - if (timer->enabled) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - } + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} - /* Disable the virtual timer */ - write_sysreg_el0(0, cntv_ctl); +void __hyp_text disable_el1_phys_timer_access(void) +{ + u64 val; + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} + +void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +{ /* * We don't need to do this for VHE since the host kernel runs in EL2 * with HCR_EL2.TGE ==1, which makes those bits have no impact. */ - if (!has_vhe()) { - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - } - - /* Clear cntvoff for the host */ - write_sysreg(0, cntvoff_el2); + if (!has_vhe()) + enable_el1_phys_timer_access(); } -void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - u64 val; - - /* Those bits are already configured at boot on VHE-system */ - if (!has_vhe()) { - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - } - - if (timer->enabled) { - write_sysreg(vtimer->cntvoff, cntvoff_el2); - write_sysreg_el0(vtimer->cnt_cval, cntv_cval); - isb(); - write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); - } + if (!has_vhe()) + disable_el1_phys_timer_access(); } -- cgit From 40f4cba9a579fe7ad1431269db8aec745c290ba0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 5 Jul 2017 12:50:27 +0200 Subject: KVM: arm/arm64: Set VCPU affinity for virt timer irq As we are about to take physical interrupts for the virtual timer on the host but want to leave those active while running the VM (and let the VM deactivate them), we need to set the vtimer PPI affinity accordingly. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 93c8973a71f4..eac1b3d83a86 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -649,11 +649,20 @@ int kvm_timer_hyp_init(void) return err; } + err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, kvm_timer_dying_cpu); + return 0; +out_free_irq: + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); return err; } -- cgit From b103cc3f10c06fb81faacd4ee6f88bbd21246073 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 16 Oct 2016 20:30:38 +0200 Subject: KVM: arm/arm64: Avoid timer save/restore in vcpu entry/exit We don't need to save and restore the hardware timer state and examine if it generates interrupts on on every entry/exit to the guest. The timer hardware is perfectly capable of telling us when it has expired by signaling interrupts. When taking a vtimer interrupt in the host, we don't want to mess with the timer configuration, we just want to forward the physical interrupt to the guest as a virtual interrupt. We can use the split priority drop and deactivate feature of the GIC to do this, which leaves an EOI'ed interrupt active on the physical distributor, making sure we don't keep taking timer interrupts which would prevent the guest from running. We can then forward the physical interrupt to the VM using the HW bit in the LR of the GIC, like we do already, which lets the guest directly deactivate both the physical and virtual timer simultaneously, allowing the timer hardware to exit the VM and generate a new physical interrupt when the timer output is again asserted later on. We do need to capture this state when migrating VCPUs between physical CPUs, however, which we use the vcpu put/load functions for, which are called through preempt notifiers whenever the thread is scheduled away from the CPU or called directly if we return from the ioctl to userspace. One caveat is that we have to save and restore the timer state in both kvm_timer_vcpu_[put/load] and kvm_timer_[schedule/unschedule], because we can have the following flows: 1. kvm_vcpu_block 2. kvm_timer_schedule 3. schedule 4. kvm_timer_vcpu_put (preempt notifier) 5. schedule (vcpu thread gets scheduled back) 6. kvm_timer_vcpu_load (preempt notifier) 7. kvm_timer_unschedule And a version where we don't actually call schedule: 1. kvm_vcpu_block 2. kvm_timer_schedule 7. kvm_timer_unschedule Since kvm_timer_[schedule/unschedule] may not be followed by put/load, but put/load also may be called independently, we call the timer save/restore functions from both paths. Since they rely on the loaded flag to never save/restore when unnecessary, this doesn't cause any harm, and we ensure that all invokations of either set of functions work as intended. An added benefit beyond not having to read and write the timer sysregs on every entry and exit is that we no longer have to actively write the active state to the physical distributor, because we configured the irq for the vtimer to only get a priority drop when handling the interrupt in the GIC driver (we called irq_set_vcpu_affinity()), and the interrupt stays active after firing on the host. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 237 +++++++++++++++++++++++++++++----------------- virt/kvm/arm/arm.c | 19 +++- 2 files changed, 164 insertions(+), 92 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index eac1b3d83a86..ec685c1f3b78 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -46,10 +46,9 @@ static const struct kvm_irq_level default_vtimer_irq = { .level = 1, }; -void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) -{ - vcpu_vtimer(vcpu)->active_cleared_last = false; -} +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx); u64 kvm_phys_timer_read(void) { @@ -69,17 +68,45 @@ static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work) cancel_work_sync(work); } -static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); /* - * We disable the timer in the world switch and let it be - * handled by kvm_timer_sync_hwstate(). Getting a timer - * interrupt at this point is a sure sign of some major - * breakage. + * When using a userspace irqchip with the architected timers, we must + * prevent continuously exiting from the guest, and therefore mask the + * physical interrupt by disabling it on the host interrupt controller + * when the virtual level is high, such that the guest can make + * forward progress. Once we detect the output level being + * de-asserted, we unmask the interrupt again so that we exit from the + * guest when the timer fires. */ - pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, 0); +} + +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct arch_timer_context *vtimer; + + if (!vcpu) { + pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); + return IRQ_NONE; + } + vtimer = vcpu_vtimer(vcpu); + + if (!vtimer->irq.level) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + if (kvm_timer_irq_can_fire(vtimer)) + kvm_timer_update_irq(vcpu, true, vtimer); + } + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_vtimer_update_mask_user(vcpu); + return IRQ_HANDLED; } @@ -215,7 +242,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, { int ret; - timer_ctx->active_cleared_last = false; timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); @@ -271,10 +297,16 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu, soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); } -static void timer_save_state(struct kvm_vcpu *vcpu) +static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + unsigned long flags; + + local_irq_save(flags); + + if (!vtimer->loaded) + goto out; if (timer->enabled) { vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); @@ -283,6 +315,10 @@ static void timer_save_state(struct kvm_vcpu *vcpu) /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); + + vtimer->loaded = false; +out: + local_irq_restore(flags); } /* @@ -296,6 +332,8 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + vtimer_save_state(vcpu); + /* * No need to schedule a background timer if any guest timer has * already expired, because kvm_vcpu_block will return before putting @@ -318,22 +356,34 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); } -static void timer_restore_state(struct kvm_vcpu *vcpu) +static void vtimer_restore_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + unsigned long flags; + + local_irq_save(flags); + + if (vtimer->loaded) + goto out; if (timer->enabled) { write_sysreg_el0(vtimer->cnt_cval, cntv_cval); isb(); write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl); } + + vtimer->loaded = true; +out: + local_irq_restore(flags); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + vtimer_restore_state(vcpu); + soft_timer_cancel(&timer->bg_timer, &timer->expired); } @@ -352,61 +402,45 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); } -static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) +static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; - /* - * If we enter the guest with the virtual input level to the VGIC - * asserted, then we have already told the VGIC what we need to, and - * we don't need to exit from the guest until the guest deactivates - * the already injected interrupt, so therefore we should set the - * hardware active state to prevent unnecessary exits from the guest. - * - * Also, if we enter the guest with the virtual timer interrupt active, - * then it must be active on the physical distributor, because we set - * the HW bit and the guest must be able to deactivate the virtual and - * physical interrupt at the same time. - * - * Conversely, if the virtual input level is deasserted and the virtual - * interrupt is not active, then always clear the hardware active state - * to ensure that hardware interrupts from the timer triggers a guest - * exit. - */ phys_active = vtimer->irq.level || - kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); - - /* - * We want to avoid hitting the (re)distributor as much as - * possible, as this is a potentially expensive MMIO access - * (not to mention locks in the irq layer), and a solution for - * this is to cache the "active" state in memory. - * - * Things to consider: we cannot cache an "active set" state, - * because the HW can change this behind our back (it becomes - * "clear" in the HW). We must then restrict the caching to - * the "clear" state. - * - * The cache is invalidated on: - * - vcpu put, indicating that the HW cannot be trusted to be - * in a sane state on the next vcpu load, - * - any change in the interrupt state - * - * Usage conditions: - * - cached value is "active clear" - * - value to be programmed is "active clear" - */ - if (vtimer->active_cleared_last && !phys_active) - return; + kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); ret = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, phys_active); WARN_ON(ret); +} - vtimer->active_cleared_last = !phys_active; +static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu) +{ + kvm_vtimer_update_mask_user(vcpu); +} + +void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (unlikely(!timer->enabled)) + return; + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_timer_vcpu_load_user(vcpu); + else + kvm_timer_vcpu_load_vgic(vcpu); + + set_cntvoff(vtimer->cntvoff); + + vtimer_restore_state(vcpu); + + if (has_vhe()) + disable_el1_phys_timer_access(); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -426,23 +460,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) ptimer->irq.level != plevel; } -static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) -{ - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - /* - * To prevent continuously exiting from the guest, we mask the - * physical interrupt such that the guest can make forward progress. - * Once we detect the output level being deasserted, we unmask the - * interrupt again so that we exit from the guest when the timer - * fires. - */ - if (vtimer->irq.level) - disable_percpu_irq(host_vtimer_irq); - else - enable_percpu_irq(host_vtimer_irq, 0); -} - /** * kvm_timer_flush_hwstate - prepare timers before running the vcpu * @vcpu: The vcpu pointer @@ -455,23 +472,61 @@ static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); if (unlikely(!timer->enabled)) return; - kvm_timer_update_state(vcpu); + if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) + kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); /* Set the background timer for the physical timer emulation. */ phys_timer_emulate(vcpu, vcpu_ptimer(vcpu)); +} - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - kvm_timer_flush_hwstate_user(vcpu); - else - kvm_timer_flush_hwstate_vgic(vcpu); +void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - set_cntvoff(vtimer->cntvoff); - timer_restore_state(vcpu); + if (unlikely(!timer->enabled)) + return; + + if (has_vhe()) + enable_el1_phys_timer_access(); + + vtimer_save_state(vcpu); + + /* + * The kernel may decide to run userspace after calling vcpu_put, so + * we reset cntvoff to 0 to ensure a consistent read between user + * accesses to the virtual counter and kernel access to the physical + * counter. + */ + set_cntvoff(0); +} + +static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_vtimer_update_mask_user(vcpu); + return; + } + + /* + * If the guest disabled the timer without acking the interrupt, then + * we must make sure the physical and virtual active states are in + * sync by deactivating the physical interrupt, because otherwise we + * wouldn't see the next timer interrupt in the host. + */ + if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) { + int ret; + ret = irq_set_irqchip_state(host_vtimer_irq, + IRQCHIP_STATE_ACTIVE, + false); + WARN_ON(ret); + } } /** @@ -484,6 +539,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); /* * This is to cancel the background timer for the physical timer @@ -491,14 +547,19 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) */ soft_timer_cancel(&timer->phys_timer, NULL); - timer_save_state(vcpu); - set_cntvoff(0); - /* - * The guest could have modified the timer registers or the timer - * could have expired, update the timer state. + * If we entered the guest with the vtimer output asserted we have to + * check if the guest has modified the timer so that we should lower + * the line at this point. */ - kvm_timer_update_state(vcpu); + if (vtimer->irq.level) { + vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); + vtimer->cnt_cval = read_sysreg_el0(cntv_cval); + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + unmask_vtimer_irq(vcpu); + } + } } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 27db222a0c8d..132d39ae13d2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -354,18 +354,18 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); - kvm_vgic_load(vcpu); + kvm_timer_vcpu_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); - kvm_timer_vcpu_put(vcpu); } static void vcpu_power_off(struct kvm_vcpu *vcpu) @@ -710,15 +710,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_arm_clear_debug(vcpu); /* - * We must sync the PMU and timer state before the vgic state so + * We must sync the PMU state before the vgic state so * that the vgic can properly sample the updated state of the * interrupt line. */ kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); + /* + * Sync the vgic state before syncing the timer state because + * the timer code needs to know if the virtual timer + * interrupts are active. + */ kvm_vgic_sync_hwstate(vcpu); + /* + * Sync the timer hardware state before enabling interrupts as + * we don't want vtimer interrupts to race with syncing the + * timer virtual interrupt state. + */ + kvm_timer_sync_hwstate(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still -- cgit From 5c5196da4e966cc23af6a1576ad2a9e45bed3f99 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 16 Jun 2017 23:08:57 -0700 Subject: KVM: arm/arm64: Support EL1 phys timer register access in set/get reg Add suport for the physical timer registers in kvm_arm_timer_set_reg and kvm_arm_timer_get_reg so that these functions can be reused to interact with the rest of the system. Note that this paves part of the way for the physical timer state save/restore, but we still need to add those registers to KVM_GET_REG_LIST before we support migrating the physical timer state. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index ec685c1f3b78..d1a6fb12121f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -628,10 +628,11 @@ static void kvm_timer_init_interrupt(void *info) int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - vtimer->cnt_ctl = value; + vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; break; case KVM_REG_ARM_TIMER_CNT: update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); @@ -639,6 +640,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) case KVM_REG_ARM_TIMER_CVAL: vtimer->cnt_cval = value; break; + case KVM_REG_ARM_PTIMER_CTL: + ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT; + break; + case KVM_REG_ARM_PTIMER_CVAL: + ptimer->cnt_cval = value; + break; + default: return -1; } @@ -647,17 +655,38 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) return 0; } +static u64 read_timer_ctl(struct arch_timer_context *timer) +{ + /* + * Set ISTATUS bit if it's expired. + * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is + * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit + * regardless of ENABLE bit for our implementation convenience. + */ + if (!kvm_timer_compute_delta(timer)) + return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; + else + return timer->cnt_ctl; +} + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) { + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); switch (regid) { case KVM_REG_ARM_TIMER_CTL: - return vtimer->cnt_ctl; + return read_timer_ctl(vtimer); case KVM_REG_ARM_TIMER_CNT: return kvm_phys_timer_read() - vtimer->cntvoff; case KVM_REG_ARM_TIMER_CVAL: return vtimer->cnt_cval; + case KVM_REG_ARM_PTIMER_CTL: + return read_timer_ctl(ptimer); + case KVM_REG_ARM_PTIMER_CVAL: + return ptimer->cnt_cval; + case KVM_REG_ARM_PTIMER_CNT: + return kvm_phys_timer_read(); } return (u64)-1; } -- cgit From cda93b7aa4657f2a9df28d56df8259226be8b0e9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 18 Jun 2017 01:41:06 -0700 Subject: KVM: arm/arm64: Move phys_timer_emulate function We are about to call phys_timer_emulate() from kvm_timer_update_state() and modify phys_timer_emulate() at the same time. Moving the function and modifying it in a single patch makes the diff hard to read, so do this separately first. No functional change. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index d1a6fb12121f..7f4f12d48a1a 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -255,6 +255,22 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } } +/* Schedule the background timer for the emulated timer. */ +static void phys_timer_emulate(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer_ctx) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (kvm_timer_should_fire(timer_ctx)) + return; + + if (!kvm_timer_irq_can_fire(timer_ctx)) + return; + + /* The timer has not yet expired, schedule a background timer */ + soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); +} + /* * Check if there was a change in the timer state (should we raise or lower * the line level to the GIC). @@ -281,22 +297,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); } -/* Schedule the background timer for the emulated timer. */ -static void phys_timer_emulate(struct kvm_vcpu *vcpu, - struct arch_timer_context *timer_ctx) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - if (kvm_timer_should_fire(timer_ctx)) - return; - - if (!kvm_timer_irq_can_fire(timer_ctx)) - return; - - /* The timer has not yet expired, schedule a background timer */ - soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); -} - static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; -- cgit From bbdd52cfcba290560909498c7d5681f19894587b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 18 Jun 2017 01:42:55 -0700 Subject: KVM: arm/arm64: Avoid phys timer emulation in vcpu entry/exit There is no need to schedule and cancel a hrtimer when entering and exiting the guest, because we know when the physical timer is going to fire when the guest programs it, and we can simply program the hrtimer at that point. Now when the register modifications from the guest go through the kvm_arm_timer_set/get_reg functions, which always call kvm_timer_update_state(), we can simply consider the timer state in this function and schedule and cancel the timers as needed. This avoids looking at the physical timer emulation state when entering and exiting the VCPU, allowing for faster servicing of the VM when needed. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 75 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 24 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 7f4f12d48a1a..20e56c420632 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -202,7 +202,27 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) { - WARN(1, "Timer only used to ensure guest exit - unexpected event."); + struct arch_timer_context *ptimer; + struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + + timer = container_of(hrt, struct arch_timer_cpu, phys_timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + ptimer = vcpu_ptimer(vcpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If not ready, schedule for a later time. + */ + ns = kvm_timer_compute_delta(ptimer); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_timer_update_irq(vcpu, true, ptimer); return HRTIMER_NORESTART; } @@ -256,24 +276,28 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } /* Schedule the background timer for the emulated timer. */ -static void phys_timer_emulate(struct kvm_vcpu *vcpu, - struct arch_timer_context *timer_ctx) +static void phys_timer_emulate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - if (kvm_timer_should_fire(timer_ctx)) - return; - - if (!kvm_timer_irq_can_fire(timer_ctx)) + /* + * If the timer can fire now we have just raised the IRQ line and we + * don't need to have a soft timer scheduled for the future. If the + * timer cannot fire at all, then we also don't need a soft timer. + */ + if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) { + soft_timer_cancel(&timer->phys_timer, NULL); return; + } - /* The timer has not yet expired, schedule a background timer */ - soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(timer_ctx)); + soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer)); } /* - * Check if there was a change in the timer state (should we raise or lower - * the line level to the GIC). + * Check if there was a change in the timer state, so that we should either + * raise or lower the line level to the GIC or schedule a background timer to + * emulate the physical timer. */ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) { @@ -295,6 +319,8 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); + + phys_timer_emulate(vcpu); } static void vtimer_save_state(struct kvm_vcpu *vcpu) @@ -441,6 +467,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) if (has_vhe()) disable_el1_phys_timer_access(); + + /* Set the background timer for the physical timer emulation. */ + phys_timer_emulate(vcpu); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -476,12 +505,6 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) if (unlikely(!timer->enabled)) return; - - if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) - kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); - - /* Set the background timer for the physical timer emulation. */ - phys_timer_emulate(vcpu, vcpu_ptimer(vcpu)); } void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) @@ -496,6 +519,17 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) vtimer_save_state(vcpu); + /* + * Cancel the physical timer emulation, because the only case where we + * need it after a vcpu_put is in the context of a sleeping VCPU, and + * in that case we already factor in the deadline for the physical + * timer when scheduling the bg_timer. + * + * In any case, we re-schedule the hrtimer for the physical timer when + * coming back to the VCPU thread in kvm_timer_vcpu_load(). + */ + soft_timer_cancel(&timer->phys_timer, NULL); + /* * The kernel may decide to run userspace after calling vcpu_put, so * we reset cntvoff to 0 to ensure a consistent read between user @@ -538,15 +572,8 @@ static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - /* - * This is to cancel the background timer for the physical timer - * emulation if it is set. - */ - soft_timer_cancel(&timer->phys_timer, NULL); - /* * If we entered the guest with the vtimer output asserted we have to * check if the guest has modified the timer so that we should lower -- cgit From 7e90c8e5704cbb299d48e7debb1e61614cb12f41 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 20 Jun 2017 07:56:20 -0700 Subject: KVM: arm/arm64: Get rid of kvm_timer_flush_hwstate Now when both the vtimer and the ptimer when using both the in-kernel vgic emulation and a userspace IRQ chip are driven by the timer signals and at the vcpu load/put boundaries, instead of recomputing the timer state at every entry/exit to/from the guest, we can get entirely rid of the flush hwstate function. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 24 ------------------------ virt/kvm/arm/arm.c | 1 - 2 files changed, 25 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 20e56c420632..53d9bd4a734f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -305,12 +305,6 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - /* - * If userspace modified the timer registers via SET_ONE_REG before - * the vgic was initialized, we mustn't set the vtimer->irq.level value - * because the guest would never see the interrupt. Instead wait - * until we call this function from kvm_timer_flush_hwstate. - */ if (unlikely(!timer->enabled)) return; @@ -489,24 +483,6 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) ptimer->irq.level != plevel; } -/** - * kvm_timer_flush_hwstate - prepare timers before running the vcpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case, making sure the timer is - * masked or disabled on the host so that we keep executing. Also schedule a - * software timer for the physical timer if it is enabled. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - - if (unlikely(!timer->enabled)) - return; -} - void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 132d39ae13d2..14c50d142c67 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -656,7 +656,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_disable(); - kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); /* -- cgit From 1c88ab7ec8c53c4d806bb2b6871ddafdebbffa8b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 6 Jan 2017 16:07:48 +0100 Subject: KVM: arm/arm64: Rework kvm_timer_should_fire kvm_timer_should_fire() can be called in two different situations from the kvm_vcpu_block(). The first case is before calling kvm_timer_schedule(), used for wait polling, and in this case the VCPU thread is running and the timer state is loaded onto the hardware so all we have to do is check if the virtual interrupt lines are asserted, becasue the timer interrupt handler functions will raise those lines as appropriate. The second case is inside the wait loop of kvm_vcpu_block(), where we have already called kvm_timer_schedule() and therefore the hardware will be disabled and the software view of the timer state is up to date (timer->loaded is false), and so we can simply check if the timer should fire by looking at the software state. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 22 +++++++++++++++++++++- virt/kvm/arm/arm.c | 3 +-- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 53d9bd4a734f..2035cf251701 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -49,6 +49,7 @@ static const struct kvm_irq_level default_vtimer_irq = { static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx); +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); u64 kvm_phys_timer_read(void) { @@ -226,7 +227,7 @@ static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } -bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { u64 cval, now; @@ -239,6 +240,25 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } +bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + if (vtimer->irq.level || ptimer->irq.level) + return true; + + /* + * When this is called from withing the wait loop of kvm_vcpu_block(), + * the software view of the timer state is up to date (timer->loaded + * is false), and so we can simply check if the timer should fire now. + */ + if (!vtimer->loaded && kvm_timer_should_fire(vtimer)) + return true; + + return kvm_timer_should_fire(ptimer); +} + /* * Reflect the timer output level into the kvm_run structure */ diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 14c50d142c67..bc126fb99a3d 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -307,8 +307,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_timer_should_fire(vcpu_vtimer(vcpu)) || - kvm_timer_should_fire(vcpu_ptimer(vcpu)); + return kvm_timer_is_pending(vcpu); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -- cgit From 4a2c4da1250d09c1477eff8e14a82b2573481f32 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 29 Oct 2017 02:04:55 +0100 Subject: arm/arm64: KVM: Load the timer state when enabling the timer After being lazy with saving/restoring the timer state, we defer that work to vcpu_load and vcpu_put, which ensure that the timer state is loaded on the hardware timers whenever the VCPU runs. Unfortunately, we are failing to do that the first time vcpu_load() runs, because the timer has not yet been enabled at that time. As long as the initialized timer state matches what happens to be in the hardware (a disabled timer, because we never leave the timer screaming), this does not show up as a problem, but is nevertheless incorrect. The solution is simple; disable preemption while setting the timer to be enabled, and call the timer load function when first enabling the timer. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 2035cf251701..4db54ff08d9e 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -861,7 +861,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return ret; no_vgic: + preempt_disable(); timer->enabled = 1; + kvm_timer_vcpu_load_vgic(vcpu); + preempt_enable(); + return 0; } -- cgit From 0a0d389ea63ced83e0e6da2b78c964b9cb3764ba Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 26 Oct 2017 17:23:07 +0200 Subject: KVM: arm/arm64: vgic-its: Remove kvm_its_unmap_device Let's remove kvm_its_unmap_device and use kvm_its_free_device as both functions are identical. Signed-off-by: Eric Auger Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 9f5e34767628..75c18183eee3 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -897,7 +897,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, } /* Requires the its_lock to be held. */ -static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) +static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) { struct its_ite *ite, *temp; @@ -960,7 +960,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * by removing the mapping and re-establishing it. */ if (device) - vgic_its_unmap_device(kvm, device); + vgic_its_free_device(kvm, device); /* * The spec does not say whether unmapping a not-mapped device @@ -1615,16 +1615,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) return vgic_its_set_abi(its, NR_ITS_ABIS - 1); } -static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev) -{ - struct its_ite *ite, *tmp; - - list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list) - its_free_ite(kvm, ite); - list_del(&dev->dev_list); - kfree(dev); -} - static void vgic_its_destroy(struct kvm_device *kvm_dev) { struct kvm *kvm = kvm_dev->kvm; -- cgit From 2f609a03391f24ac31f12f27790194ac49dff832 Mon Sep 17 00:00:00 2001 From: wanghaibin Date: Thu, 26 Oct 2017 17:23:08 +0200 Subject: KVM: arm/arm64: vgic-its: New helper functions to free the caches We create two new functions that free the device and collection lists. They are currently called by vgic_its_destroy() and other callers will be added in subsequent patches. We also remove the check on its->device_list.next. Lists are initialized in vgic_create_its() and the device is added to the device list only if this latter succeeds. vgic_its_destroy is the device destroy ops. This latter is called by kvm_destroy_devices() which loops on all created devices. So at this point the list is initialized. Acked-by: Marc Zyngier Signed-off-by: wanghaibin Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 75c18183eee3..d46256c07ba5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -913,6 +913,24 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) kfree(device); } +/* its lock must be held */ +static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_device *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) + vgic_its_free_device(kvm, cur); +} + +/* its lock must be held */ +static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_collection *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list) + vgic_its_free_collection(its, cur->collection_id); +} + /* Must be called with its_lock mutex held */ static struct its_device *vgic_its_alloc_device(struct vgic_its *its, u32 device_id, gpa_t itt_addr, @@ -1619,32 +1637,13 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) { struct kvm *kvm = kvm_dev->kvm; struct vgic_its *its = kvm_dev->private; - struct list_head *cur, *temp; - - /* - * We may end up here without the lists ever having been initialized. - * Check this and bail out early to avoid dereferencing a NULL pointer. - */ - if (!its->device_list.next) - return; mutex_lock(&its->its_lock); - list_for_each_safe(cur, temp, &its->device_list) { - struct its_device *dev; - dev = list_entry(cur, struct its_device, dev_list); - vgic_its_free_device(kvm, dev); - } - - list_for_each_safe(cur, temp, &its->collection_list) { - struct its_collection *coll; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); - coll = list_entry(cur, struct its_collection, coll_list); - list_del(cur); - kfree(coll); - } mutex_unlock(&its->its_lock); - kfree(its); } -- cgit From 36d6961c2b481614e8d37495896436d9322df052 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 26 Oct 2017 17:23:09 +0200 Subject: KVM: arm/arm64: vgic-its: Free caches when GITS_BASER Valid bit is cleared When the GITS_BASER.Valid gets cleared, the data structures in guest RAM are not valid anymore. The device, collection and LPI lists stored in the in-kernel ITS represent the same information in some form of cache. So let's void the cache. Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d46256c07ba5..1732e08a4375 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1431,7 +1431,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, unsigned long val) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); - u64 entry_size, device_type; + u64 entry_size, table_type; u64 reg, *regptr, clearbits = 0; /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ @@ -1442,12 +1442,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, case 0: regptr = &its->baser_device_table; entry_size = abi->dte_esz; - device_type = GITS_BASER_TYPE_DEVICE; + table_type = GITS_BASER_TYPE_DEVICE; break; case 1: regptr = &its->baser_coll_table; entry_size = abi->cte_esz; - device_type = GITS_BASER_TYPE_COLLECTION; + table_type = GITS_BASER_TYPE_COLLECTION; clearbits = GITS_BASER_INDIRECT; break; default: @@ -1459,10 +1459,24 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm, reg &= ~clearbits; reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; - reg |= device_type << GITS_BASER_TYPE_SHIFT; + reg |= table_type << GITS_BASER_TYPE_SHIFT; reg = vgic_sanitise_its_baser(reg); *regptr = reg; + + if (!(reg & GITS_BASER_VALID)) { + /* Take the its_lock to prevent a race with a save/restore */ + mutex_lock(&its->its_lock); + switch (table_type) { + case GITS_BASER_TYPE_DEVICE: + vgic_its_free_device_list(kvm, its); + break; + case GITS_BASER_TYPE_COLLECTION: + vgic_its_free_collection_list(kvm, its); + break; + } + mutex_unlock(&its->its_lock); + } } static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, -- cgit From 3eb4271b4ab6d38a3c113a19f358f606702e08ef Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 26 Oct 2017 17:23:11 +0200 Subject: KVM: arm/arm64: vgic-its: Implement KVM_DEV_ARM_ITS_CTRL_RESET On reset we clear the valid bits of GITS_CBASER and GITS_BASER. We also clear command queue registers and free the cache (device, collection, and lpi lists). As we need to take the same locks as save/restore functions, we create a vgic_its_ctrl() wrapper that handles KVM_DEV_ARM_VGIC_GRP_CTRL group functions. Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-its.c | 105 +++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 49 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1732e08a4375..40791c121710 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -2273,29 +2273,13 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) */ static int vgic_its_save_tables_v0(struct vgic_its *its) { - struct kvm *kvm = its->dev->kvm; int ret; - mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); - - if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return -EBUSY; - } - ret = vgic_its_save_device_tables(its); if (ret) - goto out; - - ret = vgic_its_save_collection_table(its); + return ret; -out: - unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return ret; + return vgic_its_save_collection_table(its); } /** @@ -2305,29 +2289,13 @@ out: */ static int vgic_its_restore_tables_v0(struct vgic_its *its) { - struct kvm *kvm = its->dev->kvm; int ret; - mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); - - if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); - return -EBUSY; - } - ret = vgic_its_restore_collection_table(its); if (ret) - goto out; - - ret = vgic_its_restore_device_tables(its); -out: - unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); - mutex_unlock(&kvm->lock); + return ret; - return ret; + return vgic_its_restore_device_tables(its); } static int vgic_its_commit_v0(struct vgic_its *its) @@ -2346,6 +2314,19 @@ static int vgic_its_commit_v0(struct vgic_its *its) return 0; } +static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its) +{ + /* We need to keep the ABI specific field values */ + its->baser_coll_table &= ~GITS_BASER_VALID; + its->baser_device_table &= ~GITS_BASER_VALID; + its->cbaser = 0; + its->creadr = 0; + its->cwriter = 0; + its->enabled = 0; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); +} + static int vgic_its_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -2360,6 +2341,8 @@ static int vgic_its_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: return 0; + case KVM_DEV_ARM_ITS_CTRL_RESET: + return 0; case KVM_DEV_ARM_ITS_SAVE_TABLES: return 0; case KVM_DEV_ARM_ITS_RESTORE_TABLES: @@ -2372,6 +2355,41 @@ static int vgic_its_has_attr(struct kvm_device *dev, return -ENXIO; } +static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ret = 0; + + if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ + return 0; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + switch (attr) { + case KVM_DEV_ARM_ITS_CTRL_RESET: + vgic_its_reset(kvm, its); + break; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + ret = abi->save_tables(its); + break; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + ret = abi->restore_tables(its); + break; + } + + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return ret; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -2397,19 +2415,8 @@ static int vgic_its_set_attr(struct kvm_device *dev, return vgic_register_its_iodev(dev->kvm, its, addr); } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - const struct vgic_its_abi *abi = vgic_its_get_abi(its); - - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - /* Nothing to do */ - return 0; - case KVM_DEV_ARM_ITS_SAVE_TABLES: - return abi->save_tables(its); - case KVM_DEV_ARM_ITS_RESTORE_TABLES: - return abi->restore_tables(its); - } - } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + return vgic_its_ctrl(dev->kvm, its, attr->attr); case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { u64 __user *uaddr = (u64 __user *)(long)attr->addr; u64 reg; -- cgit From 74a64a981662ab34289b3c90f6f964aa38ec1d9f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 29 Oct 2017 02:18:09 +0000 Subject: KVM: arm/arm64: Unify 32bit fault injection Both arm and arm64 implementations are capable of injecting faults, and yet have completely divergent implementations, leading to different bugs and reduced maintainability. Let's elect the arm64 version as the canonical one and move it into aarch32.c, which is common to both architectures. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/aarch32.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 79c7c357804b..8bc479fa37e6 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -25,11 +25,6 @@ #include #include -#ifndef CONFIG_ARM64 -#define COMPAT_PSR_T_BIT PSR_T_BIT -#define COMPAT_PSR_IT_MASK PSR_IT_MASK -#endif - /* * stolen from arch/arm/kernel/opcodes.c * @@ -150,3 +145,95 @@ void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) *vcpu_pc(vcpu) += 4; kvm_adjust_itstate(vcpu); } + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT); + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr & (1 << 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr & (1 << 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) + *fsr = 1 << 9 | 0x34; + else + *fsr = 0x14; +} + +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, false, addr); +} + +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, true, addr); +} -- cgit