diff options
Diffstat (limited to 'arch/x86/kvm/ioapic.c')
| -rw-r--r-- | arch/x86/kvm/ioapic.c | 355 |
1 files changed, 231 insertions, 124 deletions
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4e822ad363f3..2c2783296aed 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* * Copyright (C) 2001 MandrakeSoft S.A. * Copyright 2010 Red Hat, Inc. and/or its affiliates. @@ -8,24 +9,11 @@ * http://www.linux-mandrake.com/ * http://www.mandrakesoft.com/ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * Yunhong Jiang <yunhong.jiang@intel.com> * Yaozu (Eddie) Dong <eddie.dong@intel.com> * Based on Xen 3.1 code. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/kvm.h> @@ -36,26 +24,25 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/nospec.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/current.h> -#include <trace/events/kvm.h> #include "ioapic.h" #include "lapic.h" #include "irq.h" +#include "trace.h" -#if 0 -#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) -#else -#define ioapic_debug(fmt, arg...) -#endif static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); -static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, - unsigned long addr, - unsigned long length) +static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, + int trigger_mode, + int pin); + +static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) { unsigned long result = 0; @@ -73,13 +60,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -94,7 +82,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); @@ -113,8 +101,9 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; - if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, - e->fields.dest_mode)) + if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + e->fields.dest_id, + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode))) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); @@ -146,7 +135,7 @@ void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) { struct kvm_vcpu *vcpu; - int i; + unsigned long i; if (RTC_GSI >= IOAPIC_NUM_PINS) return; @@ -156,10 +145,16 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) __rtc_irq_eoi_tracking_restore_one(vcpu); } -static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, + int vector) { - if (test_and_clear_bit(vcpu->vcpu_id, - ioapic->rtc_status.dest_map.map)) { + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; + + /* RTC special handling */ + if (test_bit(vcpu->vcpu_id, dest_map->map) && + (vector == dest_map->vectors[vcpu->vcpu_id]) && + (test_and_clear_bit(vcpu->vcpu_id, + ioapic->rtc_status.dest_map.map))) { --ioapic->rtc_status.pending_eoi; rtc_status_pending_eoi_check_valid(ioapic); } @@ -173,6 +168,28 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) return false; } +static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq) +{ + unsigned long i; + struct kvm_vcpu *vcpu; + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) { + if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + entry->fields.dest_id, + entry->fields.dest_mode) || + kvm_apic_pending_eoi(vcpu, entry->fields.vector)) + continue; + + /* + * If no longer has pending EOI in LAPICs, update + * EOI for this vector. + */ + rtc_irq_eoi(ioapic, vcpu, entry->fields.vector); + break; + } +} + static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, int irq_level, bool line_status) { @@ -191,9 +208,18 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, } /* + * AMD SVM AVIC accelerate EOI write iff the interrupt is edge + * triggered, in which case the in-kernel IOAPIC will not be able + * to receive the EOI. In this case, we do a lazy update of the + * pending EOI when trying to set IOAPIC irq. + */ + if (edge && kvm_apicv_activated(ioapic->kvm)) + ioapic_lazy_update_eoi(ioapic, irq); + + /* * Return 0 for coalesced interrupts; for edge-triggered interrupts, * this only happens if a previous edge has not been delivered due - * do masking. For level interrupts, the remote_irr field tells + * to masking. For level interrupts, the remote_irr field tells * us if the interrupt is waiting for an EOI. * * RTC is special: it is edge-triggered, but userspace likes to know @@ -255,11 +281,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || index == RTC_GSI) { - if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode) || - kvm_apic_pending_eoi(vcpu, e->fields.vector)) - __set_bit(e->fields.vector, - ioapic_handled_vectors); + u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + + kvm_scan_ioapic_irq(vcpu, e->fields.dest_id, dm, + e->fields.vector, ioapic_handled_vectors); } } spin_unlock(&ioapic->lock); @@ -272,12 +297,49 @@ void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) kvm_make_scan_ioapic_request(kvm); } +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + mutex_lock(&kvm->irq_lock); + kimn->irq = irq; + hlist_add_head_rcu(&kimn->link, &ioapic->mask_notifier_list); + mutex_unlock(&kvm->irq_lock); +} + +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_rcu(&kimn->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +} + +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + struct kvm_irq_mask_notifier *kimn; + int idx, gsi; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kimn, &ioapic->mask_notifier_list, link) + if (kimn->irq == gsi) + kimn->func(kimn, mask); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; - int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; + int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; + DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -294,14 +356,16 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) default: index = (ioapic->ioregsel - 0x10) >> 1; - ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ old_remote_irr = e->fields.remote_irr; old_delivery_status = e->fields.delivery_status; + old_dest_id = e->fields.dest_id; + old_dest_mode = e->fields.dest_mode; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; @@ -324,10 +388,73 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); - kvm_make_scan_ioapic_request(ioapic->kvm); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && + ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { + /* + * Pending status in irr may be outdated: the IRQ line may have + * already been deasserted by a device while the IRQ was masked. + * This occurs, for instance, if the interrupt is handled in a + * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this + * case the guest acknowledges the interrupt to the device in + * its threaded irq handler, i.e. after the EOI but before + * unmasking, so at the time of unmasking the IRQ line is + * already down but our pending irr bit is still set. In such + * cases, injecting this pending interrupt to the guest is + * buggy: the guest will receive an extra unwanted interrupt. + * + * So we need to check here if the IRQ is actually still pending. + * As we are generally not able to probe the IRQ line status + * directly, we do it through irqfd resampler. Namely, we clear + * the pending status and notify the resampler that this interrupt + * is done, without actually injecting it into the guest. If the + * IRQ line is actually already deasserted, we are done. If it is + * still asserted, a new interrupt will be shortly triggered + * through irqfd and injected into the guest. + * + * If, however, it's not possible to resample (no irqfd resampler + * registered for this irq), then unconditionally inject this + * pending interrupt into the guest, so the guest will not miss + * an interrupt, although may get an extra unwanted interrupt. + */ + if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index)) + ioapic->irr &= ~(1 << index); + else + ioapic_service(ioapic, index, false); + } + if (e->fields.delivery_mode == APIC_DM_FIXED) { + struct kvm_lapic_irq irq; + + irq.vector = e->fields.vector; + irq.delivery_mode = e->fields.delivery_mode << 8; + irq.dest_mode = + kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + irq.level = false; + irq.trig_mode = e->fields.trig_mode; + irq.shorthand = APIC_DEST_NOSHORT; + irq.dest_id = e->fields.dest_id; + irq.msi_redir_hint = false; + bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); + kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + vcpu_bitmap); + if (old_dest_mode != e->fields.dest_mode || + old_dest_id != e->fields.dest_id) { + /* + * Update vcpu_bitmap with vcpus specified in + * the previous request as well. This is done to + * keep ioapic_handled_vectors synchronized. + */ + irq.dest_id = old_dest_id; + irq.dest_mode = + kvm_lapic_irq_dest_mode( + !!e->fields.dest_mode); + kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, + vcpu_bitmap); + } + kvm_make_scan_ioapic_request_mask(ioapic->kvm, + vcpu_bitmap); + } else { + kvm_make_scan_ioapic_request(ioapic->kvm); + } break; } } @@ -343,19 +470,13 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) entry->fields.remote_irr)) return -1; - ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " - "vector=%x trig_mode=%x\n", - entry->fields.dest_id, entry->fields.dest_mode, - entry->fields.delivery_mode, entry->fields.vector, - entry->fields.trig_mode); - irqe.dest_id = entry->fields.dest_id; irqe.vector = entry->fields.vector; - irqe.dest_mode = entry->fields.dest_mode; + irqe.dest_mode = kvm_lapic_irq_dest_mode(!!entry->fields.dest_mode); irqe.trig_mode = entry->fields.trig_mode; irqe.delivery_mode = entry->fields.delivery_mode << 8; irqe.level = 1; - irqe.shorthand = 0; + irqe.shorthand = APIC_DEST_NOSHORT; irqe.msi_redir_hint = false; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) @@ -381,9 +502,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) return ret; } -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level, bool line_status) +int kvm_ioapic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) { + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + int irq = e->irqchip.pin; int ret, irq_level; BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); @@ -398,16 +521,6 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, return ret; } -void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) -{ - int i; - - spin_lock(&ioapic->lock); - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) - __clear_bit(irq_source_id, &ioapic->irq_states[i]); - spin_unlock(&ioapic->lock); -} - static void kvm_ioapic_eoi_inject_work(struct work_struct *work) { int i; @@ -427,72 +540,68 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work) } #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 - -static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, - struct kvm_ioapic *ioapic, int vector, int trigger_mode) +static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, + int trigger_mode, + int pin) { - struct dest_map *dest_map = &ioapic->rtc_status.dest_map; struct kvm_lapic *apic = vcpu->arch.apic; - int i; + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[pin]; - /* RTC special handling */ - if (test_bit(vcpu->vcpu_id, dest_map->map) && - vector == dest_map->vectors[vcpu->vcpu_id]) - rtc_irq_eoi(ioapic, vcpu); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; - - if (ent->fields.vector != vector) - continue; - - /* - * We are dropping lock while calling ack notifiers because ack - * notifier callbacks for assigned devices call into IOAPIC - * recursively. Since remote_irr is cleared only after call - * to notifiers if the same vector will be delivered while lock - * is dropped it will be put into irr and will be delivered - * after ack notifier returns. - */ - spin_unlock(&ioapic->lock); - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); - spin_lock(&ioapic->lock); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + spin_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); + spin_lock(&ioapic->lock); - if (trigger_mode != IOAPIC_LEVEL_TRIG || - kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) - continue; + if (trigger_mode != IOAPIC_LEVEL_TRIG || + kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) + return; - ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); - ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << i))) { - ++ioapic->irq_eoi[i]; - if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { - /* - * Real hardware does not deliver the interrupt - * immediately during eoi broadcast, and this - * lets a buggy guest make slow progress - * even if it does not correctly handle a - * level-triggered interrupt. Emulate this - * behavior if we detect an interrupt storm. - */ - schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); - ioapic->irq_eoi[i] = 0; - trace_kvm_ioapic_delayed_eoi_inj(ent->bits); - } else { - ioapic_service(ioapic, i, false); - } + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << pin))) { + ++ioapic->irq_eoi[pin]; + if (ioapic->irq_eoi[pin] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the interrupt + * immediately during eoi broadcast, and this + * lets a buggy guest make slow progress + * even if it does not correctly handle a + * level-triggered interrupt. Emulate this + * behavior if we detect an interrupt storm. + */ + schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); + ioapic->irq_eoi[pin] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); } else { - ioapic->irq_eoi[i] = 0; + ioapic_service(ioapic, pin, false); } + } else { + ioapic->irq_eoi[pin] = 0; } } void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { + int i; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); + rtc_irq_eoi(ioapic, vcpu, vector); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + kvm_ioapic_update_eoi_one(vcpu, ioapic, trigger_mode, i); + } spin_unlock(&ioapic->lock); } @@ -515,7 +624,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ addr &= 0xff; @@ -526,7 +634,7 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, break; case IOAPIC_REG_WINDOW: - result = ioapic_read_indirect(ioapic, addr, len); + result = ioapic_read_indirect(ioapic); break; default: @@ -558,8 +666,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", - (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ switch (len) { @@ -622,11 +728,12 @@ int kvm_ioapic_init(struct kvm *kvm) struct kvm_ioapic *ioapic; int ret; - ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT); if (!ioapic) return -ENOMEM; spin_lock_init(&ioapic->lock); INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); + INIT_HLIST_HEAD(&ioapic->mask_notifier_list); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
