summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_xive.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_xive.c')
-rw-r--r--arch/powerpc/kvm/book3s_xive.c1022
1 files changed, 867 insertions, 155 deletions
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 66858b7d3c6b..29a382249770 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/uaccess.h>
+#include <linux/irqdomain.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
@@ -21,7 +22,6 @@
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include <asm/debug.h>
-#include <asm/debugfs.h>
#include <asm/time.h>
#include <asm/opal.h>
@@ -30,27 +30,629 @@
#include "book3s_xive.h"
-
-/*
- * Virtual mode variants of the hcalls for use on radix/radix
- * with AIL. They require the VCPU's VP to be "pushed"
- *
- * We still instantiate them here because we use some of the
- * generated utility functions as well in this file.
- */
-#define XIVE_RUNTIME_CHECKS
-#define X_PFX xive_vm_
-#define X_STATIC static
-#define X_STAT_PFX stat_vm_
-#define __x_tima xive_tima
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
-#define __x_writeb __raw_writeb
-#define __x_readw __raw_readw
-#define __x_readq __raw_readq
-#define __x_writeq __raw_writeq
-#include "book3s_xive_template.c"
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY 1
+
+static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
+{
+ u8 cppr;
+ u16 ack;
+
+ /*
+ * Ensure any previous store to CPPR is ordered vs.
+ * the subsequent loads from PIPR or ACK.
+ */
+ eieio();
+
+ /* Perform the acknowledge OS to register cycle. */
+ ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+ /* Synchronize subsequent queue accesses */
+ mb();
+
+ /* XXX Check grouping level */
+
+ /* Anything ? */
+ if (!((ack >> 8) & TM_QW1_NSR_EO))
+ return;
+
+ /* Grab CPPR of the most favored pending interrupt */
+ cppr = ack & 0xff;
+ if (cppr < 8)
+ xc->pending |= 1 << cppr;
+
+ /* Check consistency */
+ if (cppr >= xc->hw_cppr)
+ pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+ smp_processor_id(), cppr, xc->hw_cppr);
+
+ /*
+ * Update our image of the HW CPPR. We don't yet modify
+ * xc->cppr, this will be done as we scan for interrupts
+ * in the queues.
+ */
+ xc->hw_cppr = cppr;
+}
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+ u64 val;
+
+ if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ offset |= XIVE_ESB_LD_ST_MO;
+
+ val = __raw_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+ val >>= 64-8;
+#endif
+ return (u8)val;
+}
+
+
+static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+{
+ /* If the XIVE supports the new "store EOI facility, use it */
+ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+ else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+ /*
+ * For LSIs the HW EOI cycle is used rather than PQ bits,
+ * as they are automatically re-triggred in HW when still
+ * pending.
+ */
+ __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+ } else {
+ uint64_t eoi_val;
+
+ /*
+ * Otherwise for EOI, we use the special MMIO that does
+ * a clear of both P and Q and returns the old Q,
+ * except for LSIs where we use the "EOI cycle" special
+ * load.
+ *
+ * This allows us to then do a re-trigger if Q was set
+ * rather than synthetizing an interrupt in software
+ */
+ eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
+
+ /* Re-trigger if needed */
+ if ((eoi_val & 1) && __x_trig_page(xd))
+ __raw_writeq(0, __x_trig_page(xd));
+ }
+}
+
+enum {
+ scan_fetch,
+ scan_poll,
+ scan_eoi,
+};
+
+static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
+ u8 pending, int scan_type)
+{
+ u32 hirq = 0;
+ u8 prio = 0xff;
+
+ /* Find highest pending priority */
+ while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+ struct xive_q *q;
+ u32 idx, toggle;
+ __be32 *qpage;
+
+ /*
+ * If pending is 0 this will return 0xff which is what
+ * we want
+ */
+ prio = ffs(pending) - 1;
+
+ /* Don't scan past the guest cppr */
+ if (prio >= xc->cppr || prio > 7) {
+ if (xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ }
+ break;
+ }
+
+ /* Grab queue and pointers */
+ q = &xc->queues[prio];
+ idx = q->idx;
+ toggle = q->toggle;
+
+ /*
+ * Snapshot the queue page. The test further down for EOI
+ * must use the same "copy" that was used by __xive_read_eq
+ * since qpage can be set concurrently and we don't want
+ * to miss an EOI.
+ */
+ qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+ /*
+ * Try to fetch from the queue. Will return 0 for a
+ * non-queueing priority (ie, qpage = 0).
+ */
+ hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+ /*
+ * If this was a signal for an MFFR change done by
+ * H_IPI we skip it. Additionally, if we were fetching
+ * we EOI it now, thus re-enabling reception of a new
+ * such signal.
+ *
+ * We also need to do that if prio is 0 and we had no
+ * page for the queue. In this case, we have non-queued
+ * IPI that needs to be EOId.
+ *
+ * This is safe because if we have another pending MFRR
+ * change that wasn't observed above, the Q bit will have
+ * been set and another occurrence of the IPI will trigger.
+ */
+ if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+ if (scan_type == scan_fetch) {
+ xive_vm_source_eoi(xc->vp_ipi,
+ &xc->vp_ipi_data);
+ q->idx = idx;
+ q->toggle = toggle;
+ }
+ /* Loop back on same queue with updated idx/toggle */
+ WARN_ON(hirq && hirq != XICS_IPI);
+ if (hirq)
+ goto skip_ipi;
+ }
+
+ /* If it's the dummy interrupt, continue searching */
+ if (hirq == XICS_DUMMY)
+ goto skip_ipi;
+
+ /* Clear the pending bit if the queue is now empty */
+ if (!hirq) {
+ pending &= ~(1 << prio);
+
+ /*
+ * Check if the queue count needs adjusting due to
+ * interrupts being moved away.
+ */
+ if (atomic_read(&q->pending_count)) {
+ int p = atomic_xchg(&q->pending_count, 0);
+
+ if (p) {
+ WARN_ON(p > atomic_read(&q->count));
+ atomic_sub(p, &q->count);
+ }
+ }
+ }
+
+ /*
+ * If the most favoured prio we found pending is less
+ * favored (or equal) than a pending IPI, we return
+ * the IPI instead.
+ */
+ if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ break;
+ }
+
+ /* If fetching, update queue pointers */
+ if (scan_type == scan_fetch) {
+ q->idx = idx;
+ q->toggle = toggle;
+ }
+ }
+
+ /* If we are just taking a "peek", do nothing else */
+ if (scan_type == scan_poll)
+ return hirq;
+
+ /* Update the pending bits */
+ xc->pending = pending;
+
+ /*
+ * If this is an EOI that's it, no CPPR adjustment done here,
+ * all we needed was cleanup the stale pending bits and check
+ * if there's anything left.
+ */
+ if (scan_type == scan_eoi)
+ return hirq;
+
+ /*
+ * If we found an interrupt, adjust what the guest CPPR should
+ * be as if we had just fetched that interrupt from HW.
+ *
+ * Note: This can only make xc->cppr smaller as the previous
+ * loop will only exit with hirq != 0 if prio is lower than
+ * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+ * for pending IPIs.
+ */
+ if (hirq)
+ xc->cppr = prio;
+ /*
+ * If it was an IPI the HW CPPR might have been lowered too much
+ * as the HW interrupt we use for IPIs is routed to priority 0.
+ *
+ * We re-sync it here.
+ */
+ if (xc->cppr != xc->hw_cppr) {
+ xc->hw_cppr = xc->cppr;
+ __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+ }
+
+ return hirq;
+}
+
+static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u8 old_cppr;
+ u32 hirq;
+
+ pr_devel("H_XIRR\n");
+
+ xc->stat_vm_h_xirr++;
+
+ /* First collect pending bits from HW */
+ xive_vm_ack_pending(xc);
+
+ pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+ xc->pending, xc->hw_cppr, xc->cppr);
+
+ /* Grab previous CPPR and reverse map it */
+ old_cppr = xive_prio_to_guest(xc->cppr);
+
+ /* Scan for actual interrupts */
+ hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
+
+ pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+ hirq, xc->hw_cppr, xc->cppr);
+
+ /* That should never hit */
+ if (hirq & 0xff000000)
+ pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+
+ /*
+ * XXX We could check if the interrupt is masked here and
+ * filter it. If we chose to do so, we would need to do:
+ *
+ * if (masked) {
+ * lock();
+ * if (masked) {
+ * old_Q = true;
+ * hirq = 0;
+ * }
+ * unlock();
+ * }
+ */
+
+ /* Return interrupt and old CPPR in GPR4 */
+ kvmppc_set_gpr(vcpu, 4, hirq | (old_cppr << 24));
+
+ return H_SUCCESS;
+}
+
+static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u8 pending = xc->pending;
+ u32 hirq;
+
+ pr_devel("H_IPOLL(server=%ld)\n", server);
+
+ xc->stat_vm_h_ipoll++;
+
+ /* Grab the target VCPU if not the current one */
+ if (xc->server_num != server) {
+ vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+ if (!vcpu)
+ return H_PARAMETER;
+ xc = vcpu->arch.xive_vcpu;
+
+ /* Scan all priorities */
+ pending = 0xff;
+ } else {
+ /* Grab pending interrupt if any */
+ __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
+ u8 pipr = be64_to_cpu(qw1) & 0xff;
+
+ if (pipr < 8)
+ pending |= 1 << pipr;
+ }
+
+ hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
+
+ /* Return interrupt and old CPPR in GPR4 */
+ kvmppc_set_gpr(vcpu, 4, hirq | (xc->cppr << 24));
+
+ return H_SUCCESS;
+}
+
+static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
+{
+ u8 pending, prio;
+
+ pending = xc->pending;
+ if (xc->mfrr != 0xff) {
+ if (xc->mfrr < 8)
+ pending |= 1 << xc->mfrr;
+ else
+ pending |= 0x80;
+ }
+ if (!pending)
+ return;
+ prio = ffs(pending) - 1;
+
+ __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
+}
+
+static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
+ struct kvmppc_xive_vcpu *xc)
+{
+ unsigned int prio;
+
+ /* For each priority that is now masked */
+ for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_src_block *sb;
+ u32 idx, toggle, entry, irq, hw_num;
+ struct xive_irq_data *xd;
+ __be32 *qpage;
+ u16 src;
+
+ idx = q->idx;
+ toggle = q->toggle;
+ qpage = READ_ONCE(q->qpage);
+ if (!qpage)
+ continue;
+
+ /* For each interrupt in the queue */
+ for (;;) {
+ entry = be32_to_cpup(qpage + idx);
+
+ /* No more ? */
+ if ((entry >> 31) == toggle)
+ break;
+ irq = entry & 0x7fffffff;
+
+ /* Skip dummies and IPIs */
+ if (irq == XICS_DUMMY || irq == XICS_IPI)
+ goto next;
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ goto next;
+ state = &sb->irq_state[src];
+
+ /* Has it been rerouted ? */
+ if (xc->server_num == state->act_server)
+ goto next;
+
+ /*
+ * Allright, it *has* been re-routed, kill it from
+ * the queue.
+ */
+ qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+ /* Find the HW interrupt */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+ if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+ /* EOI the source */
+ xive_vm_source_eoi(hw_num, xd);
+
+next:
+ idx = (idx + 1) & q->msk;
+ if (idx == 0)
+ toggle ^= 1;
+ }
+ }
+}
+
+static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u8 old_cppr;
+
+ pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+ xc->stat_vm_h_cppr++;
+
+ /* Map CPPR */
+ cppr = xive_prio_from_guest(cppr);
+
+ /* Remember old and update SW state */
+ old_cppr = xc->cppr;
+ xc->cppr = cppr;
+
+ /*
+ * Order the above update of xc->cppr with the subsequent
+ * read of xc->mfrr inside push_pending_to_hw()
+ */
+ smp_mb();
+
+ if (cppr > old_cppr) {
+ /*
+ * We are masking less, we need to look for pending things
+ * to deliver and set VP pending bits accordingly to trigger
+ * a new interrupt otherwise we might miss MFRR changes for
+ * which we have optimized out sending an IPI signal.
+ */
+ xive_vm_push_pending_to_hw(xc);
+ } else {
+ /*
+ * We are masking more, we need to check the queue for any
+ * interrupt that has been routed to another CPU, take
+ * it out (replace it with the dummy) and retrigger it.
+ *
+ * This is necessary since those interrupts may otherwise
+ * never be processed, at least not until this CPU restores
+ * its CPPR.
+ *
+ * This is in theory racy vs. HW adding new interrupts to
+ * the queue. In practice this works because the interesting
+ * cases are when the guest has done a set_xive() to move the
+ * interrupt away, which flushes the xive, followed by the
+ * target CPU doing a H_CPPR. So any new interrupt coming into
+ * the queue must still be routed to us and isn't a source
+ * of concern.
+ */
+ xive_vm_scan_for_rerouted_irqs(xive, xc);
+ }
+
+ /* Apply new CPPR */
+ xc->hw_cppr = cppr;
+ __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+ return H_SUCCESS;
+}
+
+static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_irq_data *xd;
+ u8 new_cppr = xirr >> 24;
+ u32 irq = xirr & 0x00ffffff, hw_num;
+ u16 src;
+ int rc = 0;
+
+ pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+ xc->stat_vm_h_eoi++;
+
+ xc->cppr = xive_prio_from_guest(new_cppr);
+
+ /*
+ * IPIs are synthetized from MFRR and thus don't need
+ * any special EOI handling. The underlying interrupt
+ * used to signal MFRR changes is EOId when fetched from
+ * the queue.
+ */
+ if (irq == XICS_IPI || irq == 0) {
+ /*
+ * This barrier orders the setting of xc->cppr vs.
+ * subsequent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
+ goto bail;
+ }
+
+ /* Find interrupt source */
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb) {
+ pr_devel(" source not found !\n");
+ rc = H_PARAMETER;
+ /* Same as above */
+ smp_mb();
+ goto bail;
+ }
+ state = &sb->irq_state[src];
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ state->in_eoi = true;
+
+ /*
+ * This barrier orders both setting of in_eoi above vs,
+ * subsequent test of guest_priority, and the setting
+ * of xc->cppr vs. subsequent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
+
+again:
+ if (state->guest_priority == MASKED) {
+ arch_spin_lock(&sb->lock);
+ if (state->guest_priority != MASKED) {
+ arch_spin_unlock(&sb->lock);
+ goto again;
+ }
+ pr_devel(" EOI on saved P...\n");
+
+ /* Clear old_p, that will cause unmask to perform an EOI */
+ state->old_p = false;
+
+ arch_spin_unlock(&sb->lock);
+ } else {
+ pr_devel(" EOI on source...\n");
+
+ /* Perform EOI on the source */
+ xive_vm_source_eoi(hw_num, xd);
+
+ /* If it's an emulated LSI, check level and resend */
+ if (state->lsi && state->asserted)
+ __raw_writeq(0, __x_trig_page(xd));
+
+ }
+
+ /*
+ * This barrier orders the above guest_priority check
+ * and spin_lock/unlock with clearing in_eoi below.
+ *
+ * It also has to be a full mb() as it must ensure
+ * the MMIOs done in source_eoi() are completed before
+ * state->in_eoi is visible.
+ */
+ mb();
+ state->in_eoi = false;
+bail:
+
+ /* Re-evaluate pending IRQs and update HW */
+ xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
+ xive_vm_push_pending_to_hw(xc);
+ pr_devel(" after scan pending=%02x\n", xc->pending);
+
+ /* Apply new CPPR */
+ xc->hw_cppr = xc->cppr;
+ __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+ return rc;
+}
+
+static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+ xc->stat_vm_h_ipi++;
+
+ /* Find target */
+ vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+ if (!vcpu)
+ return H_PARAMETER;
+ xc = vcpu->arch.xive_vcpu;
+
+ /* Locklessly write over MFRR */
+ xc->mfrr = mfrr;
+
+ /*
+ * The load of xc->cppr below and the subsequent MMIO store
+ * to the IPI must happen after the above mfrr update is
+ * globally visible so that:
+ *
+ * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+ * updating xc->cppr then reading xc->mfrr.
+ *
+ * - The target of the IPI sees the xc->mfrr update
+ */
+ mb();
+
+ /* Shoot the IPI if most favored than target cppr */
+ if (mfrr < xc->cppr)
+ __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+ return H_SUCCESS;
+}
/*
* We leave a gap of a couple of interrupts in the queue to
@@ -58,6 +660,25 @@
*/
#define XIVE_Q_GAP 2
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ /* Check enablement at VP level */
+ return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = xc->xive;
+
+ if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+ return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+ return true;
+}
+
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
@@ -76,7 +697,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
return;
eieio();
- __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
@@ -104,7 +726,7 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
- * theorical possibility that it fires again, thus
+ * theoretical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
@@ -128,6 +750,75 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+ if (!vcpu->arch.xive_pushed)
+ return;
+
+ /*
+ * Should not have been pushed if there is no tima
+ */
+ if (WARN_ON(!tima))
+ return;
+
+ eieio();
+ /* First load to pull the context, we ignore the value */
+ __raw_readl(tima + TM_SPC_PULL_OS_CTX);
+ /* Second load to recover the context state (Words 0 and 1) */
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+ /* Fixup some of the state for the next load */
+ vcpu->arch.xive_saved_state.lsmfb = 0;
+ vcpu->arch.xive_saved_state.ack = 0xff;
+ vcpu->arch.xive_pushed = 0;
+ eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+ bool ret = true;
+
+ if (!esc_vaddr)
+ return ret;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ ret = false;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
*/
@@ -152,7 +843,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
vcpu->arch.irq_pending = 1;
smp_mb();
- if (vcpu->arch.ceded)
+ if (vcpu->arch.ceded || vcpu->arch.nested)
kvmppc_fast_vcpu_kick(vcpu);
/* Since we have the no-EOI flag, the interrupt is effectively
@@ -193,10 +884,10 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
}
if (single_escalation)
- name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+ name = kasprintf(GFP_KERNEL, "kvm-%lld-%d",
vcpu->kvm->arch.lpid, xc->server_num);
else
- name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+ name = kasprintf(GFP_KERNEL, "kvm-%lld-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
@@ -219,7 +910,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
/* In single escalation mode, we grab the ESB MMIO of the
* interrupt and mask it. Also populate the VCPU v/raddr
* of the ESB page for use by asm entry/exit code. Finally
- * set the XIVE_IRQ_NO_EOI flag which will prevent the
+ * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
* core code from performing an EOI on the escalation
* interrupt, thus leaving it effectively masked after
* it fires once.
@@ -231,7 +922,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
vcpu->arch.xive_esc_raddr = xd->eoi_page;
vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
- xd->flags |= XIVE_IRQ_NO_EOI;
+ xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
}
return 0;
@@ -282,7 +973,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvm_vcpu *vcpu;
- int i, rc;
+ unsigned long i;
+ int rc;
lockdep_assert_held(&xive->lock);
@@ -297,9 +989,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
- if (rc == 0 && !xive->single_escalation)
+ if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(vcpu, prio,
- xive->single_escalation);
+ kvmppc_xive_has_single_escalation(xive));
if (rc)
return rc;
}
@@ -353,7 +1045,8 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
{
struct kvm_vcpu *vcpu;
- int i, rc;
+ unsigned long i;
+ int rc;
/* Locate target server */
vcpu = kvmppc_xive_find_server(kvm, *server);
@@ -419,37 +1112,16 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
/* Get the right irq */
kvmppc_xive_select_irq(state, &hw_num, &xd);
- /*
- * If the interrupt is marked as needing masking via
- * firmware, we do it here. Firmware masking however
- * is "lossy", it won't return the old p and q bits
- * and won't set the interrupt to a state where it will
- * record queued ones. If this is an issue we should do
- * lazy masking instead.
- *
- * For now, we work around this in unmask by forcing
- * an interrupt whenever we unmask a non-LSI via FW
- * (if ever).
- */
- if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
- xive_native_configure_irq(hw_num,
- kvmppc_xive_vp(xive, state->act_server),
- MASKED, state->number);
- /* set old_p so we can track if an H_EOI was done */
- state->old_p = true;
- state->old_q = false;
- } else {
- /* Set PQ to 10, return old P and old Q and remember them */
- val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
- state->old_p = !!(val & 2);
- state->old_q = !!(val & 1);
+ /* Set PQ to 10, return old P and old Q and remember them */
+ val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
+ state->old_p = !!(val & 2);
+ state->old_q = !!(val & 1);
- /*
- * Synchronize hardware to sensure the queues are updated
- * when masking
- */
- xive_native_sync_source(hw_num);
- }
+ /*
+ * Synchronize hardware to sensure the queues are updated when
+ * masking
+ */
+ xive_native_sync_source(hw_num);
return old_prio;
}
@@ -483,23 +1155,6 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
/* Get the right irq */
kvmppc_xive_select_irq(state, &hw_num, &xd);
- /*
- * See command in xive_lock_and_mask() concerning masking
- * via firmware.
- */
- if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
- xive_native_configure_irq(hw_num,
- kvmppc_xive_vp(xive, state->act_server),
- state->act_priority, state->number);
- /* If an EOI is needed, do it here */
- if (!state->old_p)
- xive_vm_source_eoi(hw_num, xd);
- /* If this is not an LSI, force a trigger */
- if (!(xd->flags & OPAL_XIVE_IRQ_LSI))
- xive_irq_trigger(xd);
- goto bail;
- }
-
/* Old Q set, set PQ to 11 */
if (state->old_q)
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
@@ -572,7 +1227,7 @@ static int xive_target_interrupt(struct kvm *kvm,
/*
* Targetting rules: In order to avoid losing track of
- * pending interrupts accross mask and unmask, which would
+ * pending interrupts across mask and unmask, which would
* allow queue overflows, we implement the following rules:
*
* - Unless it was never enabled (or we run out of capacity)
@@ -894,13 +1549,13 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
}
int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
- struct irq_desc *host_desc)
+ unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
- struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
- unsigned int host_irq = irq_desc_get_irq(host_desc);
+ struct irq_data *host_data =
+ irq_domain_get_irq_data(irq_get_default_host(), host_irq);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
u16 idx;
u8 prio;
@@ -909,7 +1564,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
if (!xive)
return -ENODEV;
- pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+ __func__, guest_irq, host_irq, hw_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
@@ -931,7 +1587,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
*/
rc = irq_set_vcpu_affinity(host_irq, state);
if (rc) {
- pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+ pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
@@ -991,12 +1647,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
- struct irq_desc *host_desc)
+ unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
- unsigned int host_irq = irq_desc_get_irq(host_desc);
u16 idx;
u8 prio;
int rc;
@@ -1004,7 +1659,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
if (!xive)
return -ENODEV;
- pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
@@ -1023,7 +1678,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/*
* If old_p is set, the interrupt is pending, we switch it to
* PQ=11. This will force a resend in the host so the interrupt
- * isn't lost to whatver host driver may pick it up
+ * isn't lost to whatever host driver may pick it up
*/
if (state->old_p)
xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
@@ -1031,7 +1686,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Release the passed-through interrupt to the host */
rc = irq_set_vcpu_affinity(host_irq, NULL);
if (rc) {
- pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+ pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
@@ -1130,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
* stale_p (because it has no easy way to address it). Hence we have
* to adjust stale_p before shutting down the interrupt.
*/
-void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
- struct kvmppc_xive_vcpu *xc, int irq)
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -1171,9 +1825,8 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
if (xc->esc_virq[i]) {
- if (xc->xive->single_escalation)
- xive_cleanup_single_escalation(vcpu, xc,
- xc->esc_virq[i]);
+ if (kvmppc_xive_has_single_escalation(xc->xive))
+ xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
@@ -1214,12 +1867,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
{
/* We have a block of xive->nr_servers VPs. We just need to check
- * raw vCPU ids are below the expected limit for this guest's
- * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
- * index that can be safely used to compute a VP id that belongs
- * to the VP block.
+ * packed vCPU ids are below that.
*/
- return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+ return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
}
int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
@@ -1294,6 +1944,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
+ if (!kvmppc_xive_check_save_restore(vcpu)) {
+ pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+ r = -EIO;
+ goto bail;
+ }
+
/* Configure VCPU fields for use by assembly push/pull */
vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
@@ -1315,7 +1971,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
* Enable the VP first as the single escalation mode will
* affect escalation interrupts numbering
*/
- r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
if (r) {
pr_err("Failed to enable VP in OPAL, err %d\n", r);
goto bail;
@@ -1332,15 +1988,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct xive_q *q = &xc->queues[i];
/* Single escalation, no queue 7 */
- if (i == 7 && xive->single_escalation)
+ if (i == 7 && kvmppc_xive_has_single_escalation(xive))
break;
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
- if (r == 0 && !xive->single_escalation)
+ if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(
- vcpu, i, xive->single_escalation);
+ vcpu, i, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
} else {
@@ -1355,7 +2011,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
/* If not done above, attach priority 0 escalation */
- r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
+ r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
@@ -1468,7 +2124,8 @@ static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
static void xive_pre_save_scan(struct kvmppc_xive *xive)
{
struct kvm_vcpu *vcpu = NULL;
- int i, j;
+ unsigned long i;
+ int j;
/*
* See comment in xive_get_source() about how this
@@ -1649,7 +2306,7 @@ static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
{
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -1733,7 +2390,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
/*
* Now, we select a target if we have one. If we don't we
* leave the interrupt untargetted. It means that an interrupt
- * can become "untargetted" accross migration if it was masked
+ * can become "untargetted" across migration if it was masked
* by set_xive() but there is little we can do about it.
*/
@@ -1854,9 +2511,9 @@ int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
return -EINVAL;
if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
- state->asserted = 1;
+ state->asserted = true;
else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
- state->asserted = 0;
+ state->asserted = false;
return 0;
}
@@ -1877,7 +2534,7 @@ int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
- if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
return -EINVAL;
mutex_lock(&xive->lock);
@@ -1986,7 +2643,7 @@ static void kvmppc_xive_release(struct kvm_device *dev)
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
pr_devel("Releasing xive device\n");
@@ -2110,12 +2767,44 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
*/
xive->nr_servers = KVM_MAX_VCPUS;
- xive->single_escalation = xive_native_has_single_escalation();
+ if (xive_native_has_single_escalation())
+ xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+ if (xive_native_has_save_restore())
+ xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
kvm->arch.xive = xive;
return 0;
}
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ /* The VM should have configured XICS mode before doing XICS hcalls. */
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + kvmppc_get_tb_offset(vcpu));
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2128,9 +2817,8 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
if (!q->qpage && !xc->esc_virq[i])
continue;
- seq_printf(m, " [q%d]: ", i);
-
if (q->qpage) {
+ seq_printf(m, " q[%d]: ", i);
idx = q->idx;
i0 = be32_to_cpup(q->qpage + idx);
idx = (idx + 1) & q->msk;
@@ -2144,16 +2832,54 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
irq_data_get_irq_handler_data(d);
u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
- seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
- (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
- (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
- xc->esc_virq[i], pq, xd->eoi_page);
+ seq_printf(m, " ESC %d %c%c EOI @%llx",
+ xc->esc_virq[i],
+ (pq & XIVE_ESB_VAL_P) ? 'P' : '-',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-',
+ xd->eoi_page);
seq_puts(m, "\n");
}
}
return 0;
}
+void kvmppc_xive_debug_show_sources(struct seq_file *m,
+ struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n");
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+ struct xive_irq_data *xd;
+ u64 pq;
+ u32 hw_num;
+
+ if (!state->valid)
+ continue;
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, "%08x %08x/%02x", state->number, hw_num,
+ xd->src_chip);
+ if (state->lsi)
+ seq_printf(m, " %cLSI", state->asserted ? '^' : ' ');
+ else
+ seq_puts(m, " MSI");
+
+ seq_printf(m, " %s %c%c %08x % 4d/%d",
+ state->ipi_number == hw_num ? "IPI" : " PT",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ state->eisn, state->act_server,
+ state->act_priority);
+
+ seq_puts(m, "\n");
+ }
+}
+
static int xive_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
@@ -2169,12 +2895,12 @@ static int xive_debug_show(struct seq_file *m, void *private)
u64 t_vm_h_cppr = 0;
u64 t_vm_h_eoi = 0;
u64 t_vm_h_ipi = 0;
- unsigned int i;
+ unsigned long i;
if (!kvm)
return 0;
- seq_printf(m, "=========\nVCPU state\n=========\n");
+ seq_puts(m, "=========\nVCPU state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2182,11 +2908,12 @@ static int xive_debug_show(struct seq_file *m, void *private)
if (!xc)
continue;
- seq_printf(m, "cpu server %#x VP:%#x CPPR:%#x HWCPPR:%#x"
- " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
- xc->server_num, xc->vp_id, xc->cppr, xc->hw_cppr,
- xc->mfrr, xc->pending,
- xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+ seq_printf(m, "VCPU %d: VP:%#x/%02x\n"
+ " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
+ xc->server_num, xc->vp_id, xc->vp_chip_id,
+ xc->cppr, xc->hw_cppr,
+ xc->mfrr, xc->pending,
+ xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
kvmppc_xive_debug_show_queues(m, vcpu);
@@ -2202,13 +2929,25 @@ static int xive_debug_show(struct seq_file *m, void *private)
t_vm_h_ipi += xc->stat_vm_h_ipi;
}
- seq_printf(m, "Hcalls totals\n");
+ seq_puts(m, "Hcalls totals\n");
seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
+ seq_puts(m, "=========\nSources\n=========\n");
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_debug_show_sources(m, sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
return 0;
}
@@ -2216,24 +2955,15 @@ DEFINE_SHOW_ATTRIBUTE(xive_debug);
static void xive_debugfs_init(struct kvmppc_xive *xive)
{
- char *name;
-
- name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
- if (!name) {
- pr_err("%s: no memory for name\n", __func__);
- return;
- }
-
- xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry,
xive, &xive_debug_fops);
- pr_debug("%s: created %s\n", __func__, name);
- kfree(name);
+ pr_debug("%s: created\n", __func__);
}
static void kvmppc_xive_init(struct kvm_device *dev)
{
- struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+ struct kvmppc_xive *xive = dev->private;
/* Register some debug interfaces */
xive_debugfs_init(xive);
@@ -2248,21 +2978,3 @@ struct kvm_device_ops kvm_xive_ops = {
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};
-
-void kvmppc_xive_init_module(void)
-{
- __xive_vm_h_xirr = xive_vm_h_xirr;
- __xive_vm_h_ipoll = xive_vm_h_ipoll;
- __xive_vm_h_ipi = xive_vm_h_ipi;
- __xive_vm_h_cppr = xive_vm_h_cppr;
- __xive_vm_h_eoi = xive_vm_h_eoi;
-}
-
-void kvmppc_xive_exit_module(void)
-{
- __xive_vm_h_xirr = NULL;
- __xive_vm_h_ipoll = NULL;
- __xive_vm_h_ipi = NULL;
- __xive_vm_h_cppr = NULL;
- __xive_vm_h_eoi = NULL;
-}