diff options
Diffstat (limited to 'arch/arm64/kvm/vgic')
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-debug.c | 224 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-init.c | 29 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-its.c | 39 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-kvm-device.c | 29 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3-nested.c | 406 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v3.c | 46 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic-v4.c | 35 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.c | 38 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic/vgic.h | 39 |
9 files changed, 827 insertions, 58 deletions
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index afb018528bc3..f8425f381de9 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -320,3 +320,227 @@ void vgic_debug_init(struct kvm *kvm) void vgic_debug_destroy(struct kvm *kvm) { } + +/** + * struct vgic_its_iter - Iterator for traversing VGIC ITS device tables. + * @dev: Pointer to the current its_device being processed. + * @ite: Pointer to the current its_ite within the device being processed. + * + * This structure is used to maintain the current position during iteration + * over the ITS device tables. It holds pointers to both the current device + * and the current ITE within that device. + */ +struct vgic_its_iter { + struct its_device *dev; + struct its_ite *ite; +}; + +/** + * end_of_iter - Checks if the iterator has reached the end. + * @iter: The iterator to check. + * + * When the iterator completed processing the final ITE in the last device + * table, it was marked to indicate the end of iteration by setting its + * device and ITE pointers to NULL. + * This function checks whether the iterator was marked as end. + * + * Return: True if the iterator is marked as end, false otherwise. + */ +static inline bool end_of_iter(struct vgic_its_iter *iter) +{ + return !iter->dev && !iter->ite; +} + +/** + * vgic_its_iter_next - Advances the iterator to the next entry in the ITS tables. + * @its: The VGIC ITS structure. + * @iter: The iterator to advance. + * + * This function moves the iterator to the next ITE within the current device, + * or to the first ITE of the next device if the current ITE is the last in + * the device. If the current device is the last device, the iterator is set + * to indicate the end of iteration. + */ +static void vgic_its_iter_next(struct vgic_its *its, struct vgic_its_iter *iter) +{ + struct its_device *dev = iter->dev; + struct its_ite *ite = iter->ite; + + if (!ite || list_is_last(&ite->ite_list, &dev->itt_head)) { + if (list_is_last(&dev->dev_list, &its->device_list)) { + dev = NULL; + ite = NULL; + } else { + dev = list_next_entry(dev, dev_list); + ite = list_first_entry_or_null(&dev->itt_head, + struct its_ite, + ite_list); + } + } else { + ite = list_next_entry(ite, ite_list); + } + + iter->dev = dev; + iter->ite = ite; +} + +/** + * vgic_its_debug_start - Start function for the seq_file interface. + * @s: The seq_file structure. + * @pos: The starting position (offset). + * + * This function initializes the iterator to the beginning of the ITS tables + * and advances it to the specified position. It acquires the its_lock mutex + * to protect shared data. + * + * Return: An iterator pointer on success, NULL if no devices are found or + * the end of the list is reached, or ERR_PTR(-ENOMEM) on memory + * allocation failure. + */ +static void *vgic_its_debug_start(struct seq_file *s, loff_t *pos) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter; + struct its_device *dev; + loff_t offset = *pos; + + mutex_lock(&its->its_lock); + + dev = list_first_entry_or_null(&its->device_list, + struct its_device, dev_list); + if (!dev) + return NULL; + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return ERR_PTR(-ENOMEM); + + iter->dev = dev; + iter->ite = list_first_entry_or_null(&dev->itt_head, + struct its_ite, ite_list); + + while (!end_of_iter(iter) && offset--) + vgic_its_iter_next(its, iter); + + if (end_of_iter(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +/** + * vgic_its_debug_next - Next function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * @pos: The current position (offset). + * + * This function advances the iterator to the next entry and increments the + * position. + * + * Return: An iterator pointer on success, or NULL if the end of the list is + * reached. + */ +static void *vgic_its_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter = v; + + ++*pos; + vgic_its_iter_next(its, iter); + + if (end_of_iter(iter)) { + kfree(iter); + return NULL; + } + return iter; +} + +/** + * vgic_its_debug_stop - Stop function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * + * This function frees the iterator and releases the its_lock mutex. + */ +static void vgic_its_debug_stop(struct seq_file *s, void *v) +{ + struct vgic_its *its = s->private; + struct vgic_its_iter *iter = v; + + if (!IS_ERR_OR_NULL(iter)) + kfree(iter); + mutex_unlock(&its->its_lock); +} + +/** + * vgic_its_debug_show - Show function for the seq_file interface. + * @s: The seq_file structure. + * @v: The current iterator. + * + * This function formats and prints the ITS table entry information to the + * seq_file output. + * + * Return: 0 on success. + */ +static int vgic_its_debug_show(struct seq_file *s, void *v) +{ + struct vgic_its_iter *iter = v; + struct its_device *dev = iter->dev; + struct its_ite *ite = iter->ite; + + if (list_is_first(&ite->ite_list, &dev->itt_head)) { + seq_printf(s, "\n"); + seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n", + dev->device_id, BIT_ULL(dev->num_eventid_bits) - 1); + seq_printf(s, "EVENT_ID INTID HWINTID TARGET COL_ID HW\n"); + seq_printf(s, "-----------------------------------------------\n"); + } + + if (ite && ite->irq && ite->collection) { + seq_printf(s, "%8u %8u %8u %8u %8u %2d\n", + ite->event_id, ite->irq->intid, ite->irq->hwintid, + ite->collection->target_addr, + ite->collection->collection_id, ite->irq->hw); + } + + return 0; +} + +static const struct seq_operations vgic_its_debug_sops = { + .start = vgic_its_debug_start, + .next = vgic_its_debug_next, + .stop = vgic_its_debug_stop, + .show = vgic_its_debug_show +}; + +DEFINE_SEQ_ATTRIBUTE(vgic_its_debug); + +/** + * vgic_its_debug_init - Initializes the debugfs interface for VGIC ITS. + * @dev: The KVM device structure. + * + * This function creates a debugfs file named "vgic-its-state@%its_base" + * to expose the ITS table information. + * + * Return: 0 on success. + */ +int vgic_its_debug_init(struct kvm_device *dev) +{ + struct vgic_its *its = dev->private; + char *name; + + name = kasprintf(GFP_KERNEL, "vgic-its-state@%llx", (u64)its->vgic_its_base); + if (!name) + return -ENOMEM; + + debugfs_create_file(name, 0444, dev->kvm->debugfs_dentry, its, &vgic_its_debug_fops); + + kfree(name); + return 0; +} + +void vgic_its_debug_destroy(struct kvm_device *dev) +{ +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 775461cf2d2d..1f33e71c2a73 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -198,6 +198,27 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) return 0; } +/* Default GICv3 Maintenance Interrupt INTID, as per SBSA */ +#define DEFAULT_MI_INTID 25 + +int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu) +{ + int ret; + + guard(mutex)(&vcpu->kvm->arch.config_lock); + + /* + * Matching the tradition established with the timers, provide + * a default PPI for the maintenance interrupt. It makes + * things easier to reason about. + */ + if (vcpu->kvm->arch.vgic.mi_intid == 0) + vcpu->kvm->arch.vgic.mi_intid = DEFAULT_MI_INTID; + ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.mi_intid, vcpu); + + return ret; +} + static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -588,12 +609,20 @@ void kvm_vgic_cpu_down(void) static irqreturn_t vgic_maintenance_handler(int irq, void *data) { + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; + /* * We cannot rely on the vgic maintenance interrupt to be * delivered synchronously. This means we can only use it to * exit the VM, and we perform the handling of EOIed * interrupts on the exit path (see vgic_fold_lr_state). + * + * Of course, NV throws a wrench in this plan, and needs + * something special. */ + if (vcpu && vgic_state_is_nested(vcpu)) + vgic_v3_handle_nested_maint_irq(vcpu); + return IRQ_HANDLED; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index fb96802799c6..569f9da9049f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -154,36 +154,6 @@ out_unlock: return irq; } -struct its_device { - struct list_head dev_list; - - /* the head for the list of ITTEs */ - struct list_head itt_head; - u32 num_eventid_bits; - gpa_t itt_addr; - u32 device_id; -}; - -#define COLLECTION_NOT_MAPPED ((u32)~0) - -struct its_collection { - struct list_head coll_list; - - u32 collection_id; - u32 target_addr; -}; - -#define its_is_collection_mapped(coll) ((coll) && \ - ((coll)->target_addr != COLLECTION_NOT_MAPPED)) - -struct its_ite { - struct list_head ite_list; - - struct vgic_irq *irq; - struct its_collection *collection; - u32 event_id; -}; - /** * struct vgic_its_abi - ITS abi ops and settings * @cte_esz: collection table entry size @@ -1938,6 +1908,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) mutex_lock(&its->its_lock); + vgic_its_debug_destroy(kvm_dev); + vgic_its_free_device_list(kvm, its); vgic_its_free_collection_list(kvm, its); vgic_its_invalidate_cache(its); @@ -2771,7 +2743,12 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (ret) return ret; - return vgic_register_its_iodev(dev->kvm, its, addr); + ret = vgic_register_its_iodev(dev->kvm, its, addr); + if (ret) + return ret; + + return vgic_its_debug_init(dev); + } case KVM_DEV_ARM_VGIC_GRP_CTRL: return vgic_its_ctrl(dev->kvm, its, attr->attr); diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 5f4f57aaa23e..359094f68c23 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -303,6 +303,12 @@ static int vgic_get_common_attr(struct kvm_device *dev, VGIC_NR_PRIVATE_IRQS, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr); + break; + } } return r; @@ -517,7 +523,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; - bool uaccess; + bool uaccess, post_init = true; u32 val; int ret; @@ -533,6 +539,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, /* Sysregs uaccess is performed by the sysreg handling code */ uaccess = false; break; + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: + post_init = false; + fallthrough; default: uaccess = true; } @@ -552,7 +561,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); - if (unlikely(!vgic_initialized(dev->kvm))) { + if (post_init != vgic_initialized(dev->kvm)) { ret = -EBUSY; goto out; } @@ -582,6 +591,19 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, } break; } + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: + if (!is_write) { + val = dev->kvm->arch.vgic.mi_intid; + ret = 0; + break; + } + + ret = -EINVAL; + if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) { + dev->kvm->arch.vgic.mi_intid = val; + ret = 0; + } + break; default: ret = -EINVAL; break; @@ -608,6 +630,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, true); default: return vgic_set_common_attr(dev, attr); @@ -622,6 +645,7 @@ static int vgic_v3_get_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, false); default: return vgic_get_common_attr(dev, attr); @@ -645,6 +669,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: return vgic_v3_has_attr_regs(dev, attr); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return 0; case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c new file mode 100644 index 000000000000..4f6954c30674 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/uaccess.h> + +#include <kvm/arm_vgic.h> + +#include <asm/kvm_arm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_nested.h> + +#include "vgic.h" + +#define ICH_LRN(n) (ICH_LR0_EL2 + (n)) +#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n)) +#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n)) + +struct mi_state { + u16 eisr; + u16 elrsr; + bool pend; +}; + +/* + * The shadow registers loaded to the hardware when running a L2 guest + * with the virtual IMO/FMO bits set. + */ +struct shadow_if { + struct vgic_v3_cpu_if cpuif; + unsigned long lr_map; +}; + +static DEFINE_PER_CPU(struct shadow_if, shadow_if); + +/* + * Nesting GICv3 support + * + * On a non-nesting VM (only running at EL0/EL1), the host hypervisor + * completely controls the interrupts injected via the list registers. + * Consequently, most of the state that is modified by the guest (by ACK-ing + * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we + * keep a semi-consistent view of the interrupts. + * + * This still applies for a NV guest, but only while "InHost" (either + * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}. + * + * When running a L2 guest ("not InHost"), things are radically different, + * as the L1 guest is in charge of provisioning the interrupts via its own + * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR + * page. This means that the flow described above does work (there is no + * state to rebuild in the L0 hypervisor), and that most things happed on L2 + * load/put: + * + * - on L2 load: move the in-memory L1 vGIC configuration into a shadow, + * per-CPU data structure that is used to populate the actual LRs. This is + * an extra copy that we could avoid, but life is short. In the process, + * we remap any interrupt that has the HW bit set to the mapped interrupt + * on the host, should the host consider it a HW one. This allows the HW + * deactivation to take its course, such as for the timer. + * + * - on L2 put: perform the inverse transformation, so that the result of L2 + * running becomes visible to L1 in the VNCR-accessible registers. + * + * - there is nothing to do on L2 entry, as everything will have happened + * on load. However, this is the point where we detect that an interrupt + * targeting L1 and prepare the grand switcheroo. + * + * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1 + * interrupt. The L0 active state will be cleared by the HW if the L1 + * interrupt was itself backed by a HW interrupt. + * + * Maintenance Interrupt (MI) management: + * + * Since the L2 guest runs the vgic in its full glory, MIs get delivered and + * used as a handover point between L2 and L1. + * + * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending, + * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to + * run and process the MI. + * + * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its + * state must be computed at each entry/exit of the guest, much like we do + * it for the PMU interrupt. + * + * - because most of the ICH_*_EL2 registers live in the VNCR page, the + * quality of emulation is poor: L1 can setup the vgic so that an MI would + * immediately fire, and not observe anything until the next exit. Trying + * to read ICH_MISR_EL2 would do the trick, for example. + * + * System register emulation: + * + * We get two classes of registers: + * + * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access + * them, and L0 doesn't see a thing. + * + * - those that always trap (ELRSR, EISR, MISR): these are status registers + * that are built on the fly based on the in-memory state. + * + * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot, + * and a NV L2 would either access the VNCR page provided by L1 (memory + * based registers), or see the access redirected to L1 (registers that + * trap) thanks to NV being set by L1. + */ + +bool vgic_state_is_nested(struct kvm_vcpu *vcpu) +{ + u64 xmo; + + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO); + WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO), + "Separate virtual IRQ/FIQ settings not supported\n"); + + return !!xmo; + } + + return false; +} + +static struct shadow_if *get_shadow_if(void) +{ + return this_cpu_ptr(&shadow_if); +} + +static bool lr_triggers_eoi(u64 lr) +{ + return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI); +} + +static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state) +{ + u16 eisr = 0, elrsr = 0; + bool pend = false; + + for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + + if (lr_triggers_eoi(lr)) + eisr |= BIT(i); + if (!(lr & ICH_LR_STATE)) + elrsr |= BIT(i); + pend |= (lr & ICH_LR_PENDING_BIT); + } + + mi_state->eisr = eisr; + mi_state->elrsr = elrsr; + mi_state->pend = pend; +} + +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + + vgic_compute_mi_state(vcpu, &mi_state); + return mi_state.eisr; +} + +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + + vgic_compute_mi_state(vcpu, &mi_state); + return mi_state.elrsr; +} + +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + u64 reg = 0, hcr, vmcr; + + hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); + vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); + + vgic_compute_mi_state(vcpu, &mi_state); + + if (mi_state.eisr) + reg |= ICH_MISR_EL2_EOI; + + if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) { + int used_lrs = kvm_vgic_global_state.nr_lr; + + used_lrs -= hweight16(mi_state.elrsr); + reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0; + } + + if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr)) + reg |= ICH_MISR_EL2_LRENP; + + if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend) + reg |= ICH_MISR_EL2_NP; + + if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK)) + reg |= ICH_MISR_EL2_VGrp0E; + + if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK)) + reg |= ICH_MISR_EL2_VGrp0D; + + if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK)) + reg |= ICH_MISR_EL2_VGrp1E; + + if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK)) + reg |= ICH_MISR_EL2_VGrp1D; + + return reg; +} + +/* + * For LRs which have HW bit set such as timer interrupts, we modify them to + * have the host hardware interrupt number instead of the virtual one programmed + * by the guest hypervisor. + */ +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, + struct vgic_v3_cpu_if *s_cpu_if) +{ + unsigned long lr_map = 0; + int index = 0; + + for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + struct vgic_irq *irq; + + if (!(lr & ICH_LR_STATE)) + lr = 0; + + if (!(lr & ICH_LR_HW)) + goto next; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { + /* There was no real mapping, so nuke the HW bit */ + lr &= ~ICH_LR_HW; + if (irq) + vgic_put_irq(vcpu->kvm, irq); + goto next; + } + + /* Translate the virtual mapping to the real one */ + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + + vgic_put_irq(vcpu->kvm, irq); + +next: + s_cpu_if->vgic_lr[index] = lr; + if (lr) { + lr_map |= BIT(i); + index++; + } + } + + container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; + s_cpu_if->used_lrs = index; +} + +void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + int i, index = 0; + + for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) + goto next; + + /* + * If we had a HW lr programmed by the guest hypervisor, we + * need to emulate the HW effect between the guest hypervisor + * and the nested guest. + */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ + goto next; + + lr = __gic_v3_get_lr(index); + if (!(lr & ICH_LR_STATE)) + irq->active = false; + + vgic_put_irq(vcpu->kvm, irq); + next: + index++; + } +} + +static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu, + struct vgic_v3_cpu_if *s_cpu_if) +{ + struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val = 0; + int i; + + /* + * If we're on a system with a broken vgic that requires + * trapping, propagate the trapping requirements. + * + * Ah, the smell of rotten fruits... + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | + ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR); + s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val; + s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); + s_cpu_if->vgic_sre = host_if->vgic_sre; + + for (i = 0; i < 4; i++) { + s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i)); + s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i)); + } + + vgic_v3_create_shadow_lr(vcpu, s_cpu_if); +} + +void vgic_v3_load_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif; + + BUG_ON(!vgic_state_is_nested(vcpu)); + + vgic_v3_create_shadow_state(vcpu, cpu_if); + + __vgic_v3_restore_vmcr_aprs(cpu_if); + __vgic_v3_activate_traps(cpu_if); + + __vgic_v3_restore_state(cpu_if); + + /* + * Propagate the number of used LRs for the benefit of the HYP + * GICv3 emulation code. Yes, this is a pretty sorry hack. + */ + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs; +} + +void vgic_v3_put_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif; + u64 val; + int i; + + __vgic_v3_save_vmcr_aprs(s_cpu_if); + __vgic_v3_deactivate_traps(s_cpu_if); + __vgic_v3_save_state(s_cpu_if); + + /* + * Translate the shadow state HW fields back to the virtual ones + * before copying the shadow struct back to the nested one. + */ + val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); + val &= ~ICH_HCR_EL2_EOIcount_MASK; + val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); + __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; + __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + + for (i = 0; i < 4; i++) { + __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; + __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + } + + for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { + val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + + val &= ~ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + + __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; + s_cpu_if->vgic_lr[i] = 0; + } + + shadow_if->lr_map = 0; + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; +} + +/* + * If we exit a L2 VM with a pending maintenance interrupt from the GIC, + * then we need to forward this to L1 so that it can re-sync the appropriate + * LRs and sample level triggered interrupts again. + */ +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) +{ + bool state = read_sysreg_s(SYS_ICH_MISR_EL2); + + /* This will force a switch back to L1 if the level is high */ + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + vcpu->kvm->arch.vgic.mi_intid, state, vcpu); + + sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); +} + +void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) +{ + bool level; + + level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; + if (level) + level &= vgic_v3_get_misr(vcpu); + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + vcpu->kvm->arch.vgic.mi_intid, level, vcpu); +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index d7233ab982d0..b9ad7c42c5b0 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -24,7 +24,7 @@ void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - cpuif->vgic_hcr |= ICH_HCR_UIE; + cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; } static bool lr_signals_eoi_mi(u64 lr_val) @@ -42,7 +42,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE; for (lr = 0; lr < cpuif->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; @@ -284,15 +284,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_sre = 0; } - vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_ID_BITS_MASK) >> - ICH_VTR_ID_BITS_SHIFT; - vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_PRI_BITS_MASK) >> - ICH_VTR_PRI_BITS_SHIFT) + 1; + vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits, + kvm_vgic_global_state.ich_vtr_el2); + vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, + kvm_vgic_global_state.ich_vtr_el2) + 1; /* Get the show on the road... */ - vgic_v3->vgic_hcr = ICH_HCR_EN; + vgic_v3->vgic_hcr = ICH_HCR_EL2_En; } void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) @@ -301,18 +299,19 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) /* Hide GICv3 sysreg if necessary */ if (!kvm_has_gicv3(vcpu->kvm)) { - vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC; + vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | + ICH_HCR_EL2_TC); return; } if (group0_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0; if (group1_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1; if (common_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TC; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC; if (dir_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TDIR; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -632,8 +631,8 @@ static const struct midr_range broken_seis[] = { static bool vgic_v3_broken_seis(void) { - return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && - is_midr_in_range_list(read_cpuid_id(), broken_seis)); + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) && + is_midr_in_range_list(broken_seis)); } /** @@ -706,10 +705,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (vgic_v3_broken_seis()) { kvm_info("GICv3 with broken locally generated SEI\n"); - kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; group0_trap = true; group1_trap = true; - if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + if (ich_vtr_el2 & ICH_VTR_EL2_TDS) dir_trap = true; else common_trap = true; @@ -735,6 +734,12 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + /* If the vgic is nested, perform the full state loading */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_load_nested(vcpu); + return; + } + if (likely(!is_protected_kvm_enabled())) kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); @@ -748,6 +753,11 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + if (vgic_state_is_nested(vcpu)) { + vgic_v3_put_nested(vcpu); + return; + } + if (likely(!is_protected_kvm_enabled())) kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); WARN_ON(vgic_v4_put(vcpu)); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index eedecbbbcf31..c7de6154627c 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -336,6 +336,22 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } +static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu) +{ + if (vcpu_get_flag(vcpu, IN_WFI)) + return true; + + if (likely(!vcpu_has_nv(vcpu))) + return false; + + /* + * GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the + * L1 context) nonresident and request a doorbell to kick us out of the + * L2 when an IRQ becomes pending. + */ + return vcpu_get_flag(vcpu, IN_NESTED_ERET); +} + int vgic_v4_put(struct kvm_vcpu *vcpu) { struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; @@ -343,7 +359,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu) if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); + return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu)); } int vgic_v4_load(struct kvm_vcpu *vcpu) @@ -415,7 +431,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, struct vgic_irq *irq; struct its_vlpi_map map; unsigned long flags; - int ret; + int ret = 0; if (!vgic_supports_direct_msis(kvm)) return 0; @@ -430,10 +446,15 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, mutex_lock(&its->its_lock); - /* Perform the actual DevID/EventID -> LPI translation. */ - ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, - irq_entry->msi.data, &irq); - if (ret) + /* + * Perform the actual DevID/EventID -> LPI translation. + * + * Silently exit if translation fails as the guest (or userspace!) has + * managed to do something stupid. Emulated LPI injection will still + * work if the guest figures itself out at a later time. + */ + if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq)) goto out; /* Silently exit if the vLPI is already mapped */ @@ -512,7 +533,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, if (ret) goto out; - WARN_ON(!(irq->hw && irq->host_irq == virq)); + WARN_ON(irq->hw && irq->host_irq != virq); if (irq->hw) { atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); irq->hw = false; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index cc8c6b9b5dd8..8f8096d48925 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -872,6 +872,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { int used_lrs; + /* If nesting, emulate the HW effect from L0 to L1 */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_sync_nested(vcpu); + return; + } + + if (vcpu_has_nv(vcpu)) + vgic_v3_nested_update_mi(vcpu); + /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; @@ -901,6 +910,35 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { /* + * If in a nested state, we must return early. Two possibilities: + * + * - If we have any pending IRQ for the guest and the guest + * expects IRQs to be handled in its virtual EL2 mode (the + * virtual IMO bit is set) and it is not already running in + * virtual EL2 mode, then we have to emulate an IRQ + * exception to virtual EL2. + * + * We do that by placing a request to ourselves which will + * abort the entry procedure and inject the exception at the + * beginning of the run loop. + * + * - Otherwise, do exactly *NOTHING*. The guest state is + * already loaded, and we can carry on with running it. + * + * If we have NV, but are not in a nested state, compute the + * maintenance interrupt state, as it may fire. + */ + if (vgic_state_is_nested(vcpu)) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) + kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); + + return; + } + + if (vcpu_has_nv(vcpu)) + vgic_v3_nested_update_mi(vcpu); + + /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without * taking any lock. There is a potential race with someone injecting diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 122d95b4e284..4349084cb9a6 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -172,6 +172,36 @@ struct vgic_reg_attr { gpa_t addr; }; +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_ite { + struct list_head ite_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 event_id; +}; + int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr); int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, @@ -353,4 +383,13 @@ static inline bool kvm_has_gicv3(struct kvm *kvm) return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP); } +void vgic_v3_sync_nested(struct kvm_vcpu *vcpu); +void vgic_v3_load_nested(struct kvm_vcpu *vcpu); +void vgic_v3_put_nested(struct kvm_vcpu *vcpu); +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); +void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); + +int vgic_its_debug_init(struct kvm_device *dev); +void vgic_its_debug_destroy(struct kvm_device *dev); + #endif |