diff options
Diffstat (limited to 'arch/powerpc/sysdev/xive')
| -rw-r--r-- | arch/powerpc/sysdev/xive/Makefile | 1 | ||||
| -rw-r--r-- | arch/powerpc/sysdev/xive/common.c | 914 | ||||
| -rw-r--r-- | arch/powerpc/sysdev/xive/native.c | 252 | ||||
| -rw-r--r-- | arch/powerpc/sysdev/xive/spapr.c | 258 | ||||
| -rw-r--r-- | arch/powerpc/sysdev/xive/xive-internal.h | 25 |
5 files changed, 1089 insertions, 361 deletions
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile index dea2abc23f4d..e5108883894a 100644 --- a/arch/powerpc/sysdev/xive/Makefile +++ b/arch/powerpc/sysdev/xive/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-y += common.o obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 94a69a62f5db..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2016,2017 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "xive: " fmt @@ -13,6 +9,7 @@ #include <linux/threads.h> #include <linux/kernel.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/debugfs.h> #include <linux/smp.h> #include <linux/interrupt.h> @@ -23,8 +20,8 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> -#include <asm/prom.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/machdep.h> @@ -65,22 +62,37 @@ static const struct xive_ops *xive_ops; static struct irq_domain *xive_irq_domain; #ifdef CONFIG_SMP -/* The IPIs all use the same logical irq number */ -static u32 xive_ipi_irq; +/* The IPIs use the same logical irq number when on the same chip */ +static struct xive_ipi_desc { + unsigned int irq; + char name[16]; + atomic_t started; +} *xive_ipis; + +/* + * Use early_cpu_to_node() for hot-plugged CPUs + */ +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) +{ + return xive_ipis[early_cpu_to_node(cpu)].irq; +} #endif /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); +/* An invalid CPU target */ +#define XIVE_INVALID_TARGET (-1) + /* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this + * Global toggle to switch on/off StoreEOI */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) +static bool xive_store_eoi = true; -/* An invalid CPU target */ -#define XIVE_INVALID_TARGET (-1) +static bool xive_is_store_eoi(struct xive_irq_data *xd) +{ + return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi; +} /* * Read the next entry in a queue, return its content if it's valid @@ -139,7 +151,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek) static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) { u32 irq = 0; - u8 prio; + u8 prio = 0; /* Find highest pending priority */ while (xc->pending_prio != 0) { @@ -152,8 +164,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) irq = xive_read_eq(&xc->queue[prio], just_peek); /* Found something ? That's it */ - if (irq) - break; + if (irq) { + if (just_peek || irq_to_desc(irq)) + break; + /* + * We should never get here; if we do then we must + * have failed to synchronize the interrupt properly + * when shutting it down. + */ + pr_crit("xive: got interrupt %d without descriptor, dropping\n", + irq); + WARN_ON(1); + continue; + } /* Clear pending bits */ xc->pending_prio &= ~(1 << prio); @@ -189,15 +212,14 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) /* * This is used to perform the magic loads from an ESB - * described in xive.h + * described in xive-regs.h */ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; + if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd)) + offset |= XIVE_ESB_LD_ST_MO; if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); @@ -209,16 +231,27 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) { - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) xive_ops->esb_rw(xd->hw_irq, offset, data, 1); else out_be64(xd->eoi_mmio + offset, data); } +#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS) +static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size) +{ + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx", + xive_is_store_eoi(xd) ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-', + xd->trig_page, xd->eoi_page); +} +#endif + #ifdef CONFIG_XMON static notrace void xive_dump_eq(const char *name, struct xive_q *q) { @@ -230,26 +263,82 @@ static notrace void xive_dump_eq(const char *name, struct xive_q *q) i0 = be32_to_cpup(q->qpage + idx); idx = (idx + 1) & q->msk; i1 = be32_to_cpup(q->qpage + idx); - xmon_printf(" %s Q T=%d %08x %08x ...\n", name, - q->toggle, i0, i1); + xmon_printf("%s idx=%d T=%d %08x %08x ...", name, + q->idx, q->toggle, i0, i1); } notrace void xmon_xive_do_dump(int cpu) { struct xive_cpu *xc = per_cpu(xive_cpu, cpu); - xmon_printf("XIVE state for CPU %d:\n", cpu); - xmon_printf(" pp=%02x cppr=%02x\n", xc->pending_prio, xc->cppr); - xive_dump_eq("IRQ", &xc->queue[xive_irq_priority]); + xmon_printf("CPU %d:", cpu); + if (xc) { + xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); + #ifdef CONFIG_SMP - { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); - xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : 'p', - val & XIVE_ESB_VAL_Q ? 'Q' : 'q'); - } + { + char buffer[128]; + + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer); + } #endif + xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); + } + xmon_printf("\n"); } + +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + +int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) +{ + int rc; + u32 target; + u8 prio; + u32 lirq; + + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); + if (rc) { + xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); + return rc; + } + + xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", + hw_irq, target, prio, lirq); + + if (!d) + d = xive_get_irq_data(hw_irq); + + if (d) { + char buffer[128]; + + xive_irq_data_dump(irq_data_get_irq_chip_data(d), + buffer, sizeof(buffer)); + xmon_printf("%s", buffer); + } + + xmon_printf("\n"); + return 0; +} + +void xmon_xive_get_irq_all(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xmon_xive_get_irq_config(irqd_to_hwirq(d), d); + } +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) @@ -294,7 +383,7 @@ static unsigned int xive_get_irq(void) * CPU. * * If we find that there is indeed more in there, we call - * force_external_irq_replay() to make Linux synthetize an + * force_external_irq_replay() to make Linux synthesize an * external interrupt on the next call to local_irq_restore(). */ static void xive_do_queue_eoi(struct xive_cpu *xc) @@ -309,55 +398,46 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) * EOI an interrupt at the source. There are several methods * to do this depending on the HW version and source type */ -static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) +static void xive_do_source_eoi(struct xive_irq_data *xd) { + u8 eoi_val; + + xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (xive_is_store_eoi(xd)) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { - /* - * The FW told us to call it. This happens for some - * interrupt sources that need additional HW whacking - * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 needed a latch to be clared in the LPC bridge - * itself. The Firmware will take care of it. - */ - if (WARN_ON_ONCE(!xive_ops->eoi)) - return; - xive_ops->eoi(hw_irq); - } else { - u8 eoi_val; + return; + } - /* - * Otherwise for EOI, we use the special MMIO that does - * a clear of both P and Q and returns the old Q, - * except for LSIs where we use the "EOI cycle" special - * load. - * - * This allows us to then do a re-trigger if Q was set - * rather than synthesizing an interrupt in software - * - * For LSIs the HW EOI cycle is used rather than PQ bits, - * as they are automatically re-triggred in HW when still - * pending. - */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_esb_read(xd, XIVE_ESB_LOAD_EOI); - else { - eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); - DBG_VERBOSE("eoi_val=%x\n", eoi_val); - - /* Re-trigger if needed */ - if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) - out_be64(xd->trig_mmio, 0); - } + /* + * For LSIs, we use the "EOI cycle" special load rather than + * PQ bits, as they are automatically re-triggered in HW when + * still pending. + */ + if (xd->flags & XIVE_IRQ_FLAG_LSI) { + xive_esb_read(xd, XIVE_ESB_LOAD_EOI); + return; } + + /* + * Otherwise, we use the special MMIO that does a clear of + * both P and Q and returns the old Q. This allows us to then + * do a re-trigger if Q was set rather than synthesizing an + * interrupt in software + */ + eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + DBG_VERBOSE("eoi_val=%x\n", eoi_val); + + /* Re-trigger if needed */ + if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) + out_be64(xd->trig_mmio, 0); } -/* irq_chip eoi callback */ +/* irq_chip eoi callback, called with irq descriptor lock held */ static void xive_irq_eoi(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); struct xive_cpu *xc = __this_cpu_read(xive_cpu); DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n", @@ -368,8 +448,10 @@ static void xive_irq_eoi(struct irq_data *d) * been passed-through to a KVM guest */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && - !(xd->flags & XIVE_IRQ_NO_EOI)) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) + xive_do_source_eoi(xd); + else + xd->stale_p = true; /* * Clear saved_p to indicate that it's no longer occupying @@ -382,15 +464,15 @@ static void xive_irq_eoi(struct irq_data *d) } /* - * Helper used to mask and unmask an interrupt source. This - * is only called for normal interrupts that do not require - * masking/unmasking via firmware. + * Helper used to mask and unmask an interrupt source. */ static void xive_do_source_set_mask(struct xive_irq_data *xd, bool mask) { u64 val; + pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un"); + /* * If the interrupt had P set, it may be in a queue. * @@ -401,11 +483,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd, */ if (mask) { val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01); - xd->saved_p = !!(val & XIVE_ESB_VAL_P); - } else if (xd->saved_p) + if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P)) + xd->saved_p = true; + xd->stale_p = false; + } else if (xd->saved_p) { xive_esb_read(xd, XIVE_ESB_SET_PQ_10); - else + xd->saved_p = false; + } else { xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + xd->stale_p = false; + } } /* @@ -442,7 +529,7 @@ static void xive_dec_target_count(int cpu) struct xive_cpu *xc = per_cpu(xive_cpu, cpu); struct xive_q *q = &xc->queue[xive_irq_priority]; - if (unlikely(WARN_ON(cpu < 0 || !xc))) { + if (WARN_ON(cpu < 0 || !xc)) { pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc); return; } @@ -483,7 +570,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask, * Now go through the entire mask until we find a valid * target. */ - for (;;) { + do { /* * We re-check online as the fallback case passes us * an untested affinity mask @@ -491,12 +578,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask, if (cpu_online(cpu) && xive_try_pick_target(cpu)) return cpu; cpu = cpumask_next(cpu, mask); - if (cpu == first) - break; /* Wrap around */ if (cpu >= nr_cpu_ids) cpu = cpumask_first(mask); - } + } while (cpu != first); + return -1; } @@ -509,7 +595,7 @@ static int xive_pick_irq_target(struct irq_data *d, const struct cpumask *affinity) { static unsigned int fuzz; - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); cpumask_var_t mask; int cpu = -1; @@ -542,22 +628,14 @@ static int xive_pick_irq_target(struct irq_data *d, static unsigned int xive_irq_startup(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int target, rc; - pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + xd->saved_p = false; + xd->stale_p = false; -#ifdef CONFIG_PCI_MSI - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (irq_data_get_msi_desc(d)) - pci_msi_unmask_irq(d); -#endif + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); /* Pick a target */ target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); @@ -592,13 +670,13 @@ static unsigned int xive_irq_startup(struct irq_data *d) return 0; } +/* called with irq descriptor lock held */ static void xive_irq_shutdown(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) return; @@ -607,16 +685,6 @@ static void xive_irq_shutdown(struct irq_data *d) xive_do_source_set_mask(xd, true); /* - * The above may have set saved_p. We clear it otherwise it - * will prevent re-enabling later on. It is ok to forget the - * fact that the interrupt might be in a queue because we are - * accounting that already in xive_dec_target_count() and will - * be re-routing it to a new queue with proper accounting when - * it's started up again - */ - xd->saved_p = false; - - /* * Mask the interrupt in HW in the IVT/EAS and set the number * to be the "bad" IRQ number */ @@ -630,46 +698,18 @@ static void xive_irq_shutdown(struct irq_data *d) static void xive_irq_unmask(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - - pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call FW to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - xive_irq_priority, d->irq); - return; - } + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, false); } static void xive_irq_mask(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - - pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call OPAL to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - 0xff, d->irq); - return; - } + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, true); } @@ -678,21 +718,17 @@ static int xive_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); u32 target, old_target; int rc = 0; - pr_devel("xive_irq_set_affinity: irq %d\n", d->irq); + pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) + if (!cpumask_intersects(cpumask, cpu_online_mask)) return -EINVAL; - /* Don't do anything if the interrupt isn't started */ - if (!irqd_is_started(d)) - return IRQ_SET_MASK_OK; - /* * If existing target is already in the new mask, and is * online then do nothing. @@ -728,7 +764,7 @@ static int xive_irq_set_affinity(struct irq_data *d, return rc; } - pr_devel(" target: 0x%x\n", target); + pr_debug(" target: 0x%x\n", target); xd->target = target; /* Give up previous target */ @@ -740,14 +776,14 @@ static int xive_irq_set_affinity(struct irq_data *d, static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); /* * We only support these. This has really no effect other than setting * the corresponding descriptor bits mind you but those will in turn * affect the resend function when re-enabling an edge interrupt. * - * Set set the default to edge as explained in map(). + * Set the default to edge as explained in map(). */ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) flow_type = IRQ_TYPE_EDGE_RISING; @@ -779,7 +815,7 @@ static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) static int xive_irq_retrigger(struct irq_data *d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); /* This should be only for MSIs */ if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI)) @@ -790,33 +826,23 @@ static int xive_irq_retrigger(struct irq_data *d) * 11, then perform an EOI. */ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - - /* - * Note: We pass "0" to the hw_irq argument in order to - * avoid calling into the backend EOI code which we don't - * want to do in the case of a re-trigger. Backends typically - * only do EOI for LSIs anyway. - */ - xive_do_source_eoi(0, xd); + xive_do_source_eoi(xd); return 1; } +/* + * Caller holds the irq descriptor lock, so this won't be called + * concurrently with xive_get_irqchip_state on the same interrupt. + */ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_data_get_irq_chip_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int rc; u8 pq; /* - * We only support this on interrupts that do not require - * firmware calls for masking and unmasking - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) - return -EIO; - - /* * This is called by KVM with state non-NULL for enabling * pass-through or NULL for disabling it */ @@ -825,6 +851,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) /* Set it to PQ=10 state to prevent further sends */ pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10); + if (!xd->stale_p) { + xd->saved_p = !!(pq & XIVE_ESB_VAL_P); + xd->stale_p = !xd->saved_p; + } /* No target ? nothing to do */ if (xd->target == XIVE_INVALID_TARGET) { @@ -832,7 +862,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * An untargetted interrupt should have been * also masked at the source */ - WARN_ON(pq & 2); + WARN_ON(xd->saved_p); return 0; } @@ -844,7 +874,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * * This also tells us that it's in flight to a host queue * or has already been fetched but hasn't been EOIed yet - * by the host. This it's potentially using up a host + * by the host. Thus it's potentially using up a host * queue slot. This is important to know because as long * as this is the case, we must not hard-unmask it when * "returning" that interrupt to the host. @@ -852,9 +882,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * This saved_p is cleared by the host EOI, when we know * for sure the queue slot is no longer in use. */ - if (pq & 2) { - pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - xd->saved_p = true; + if (xd->saved_p) { + xive_esb_read(xd, XIVE_ESB_SET_PQ_11); /* * Sync the XIVE source HW to ensure the interrupt @@ -867,8 +896,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) */ if (xive_ops->sync_source) xive_ops->sync_source(hw_irq); - } else - xd->saved_p = false; + } } else { irqd_clr_forwarded_to_vcpu(d); @@ -913,12 +941,39 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * while masked, the generic code will re-mask it anyway. */ if (!xd->saved_p) - xive_do_source_eoi(hw_irq, xd); + xive_do_source_eoi(xd); } return 0; } +/* Called with irq descriptor lock held. */ +static int xive_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, bool *state) +{ + struct xive_irq_data *xd = irq_data_get_irq_chip_data(data); + u8 pq; + + switch (which) { + case IRQCHIP_STATE_ACTIVE: + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); + return 0; + default: + return -EINVAL; + } +} + static struct irq_chip xive_irq_chip = { .name = "XIVE-IRQ", .irq_startup = xive_irq_startup, @@ -930,6 +985,7 @@ static struct irq_chip xive_irq_chip = { .irq_set_type = xive_irq_set_type, .irq_retrigger = xive_irq_retrigger, .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity, + .irq_get_irqchip_state = xive_get_irqchip_state, }; bool is_xive_irq(struct irq_chip *chip) @@ -940,6 +996,8 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { + pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq); + if (xd->eoi_mmio) { iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) @@ -953,32 +1011,40 @@ void xive_cleanup_irq_data(struct xive_irq_data *xd) } EXPORT_SYMBOL_GPL(xive_cleanup_irq_data); -static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) +static struct xive_irq_data *xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) { struct xive_irq_data *xd; int rc; xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL); if (!xd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rc = xive_ops->populate_irq_data(hw, xd); if (rc) { kfree(xd); - return rc; + return ERR_PTR(rc); } xd->target = XIVE_INVALID_TARGET; - irq_set_handler_data(virq, xd); - return 0; + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + + return xd; } static void xive_irq_free_data(unsigned int virq) { - struct xive_irq_data *xd = irq_get_handler_data(virq); + struct xive_irq_data *xd = irq_get_chip_data(virq); if (!xd) return; - irq_set_handler_data(virq, NULL); + irq_set_chip_data(virq, NULL); xive_cleanup_irq_data(xd); kfree(xd); } @@ -1017,7 +1083,7 @@ static void xive_ipi_eoi(struct irq_data *d) DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); + xive_do_source_eoi(&xc->ipi_data); xive_do_queue_eoi(xc); } @@ -1036,28 +1102,100 @@ static struct irq_chip xive_ipi_chip = { .irq_unmask = xive_ipi_do_nothing, }; -static void __init xive_request_ipi(void) +/* + * IPIs are marked per-cpu. We use separate HW interrupts under the + * hood but associated with the same "linux" interrupt + */ +struct xive_ipi_alloc_info { + irq_hw_number_t hwirq; +}; + +static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - unsigned int virq; + struct xive_ipi_alloc_info *info = arg; + int i; - /* - * Initialization failed, move on, we might manage to - * reach the point where we display our errors before - * the system falls appart - */ - if (!xive_irq_domain) - return; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, + domain->host_data, handle_percpu_irq, + NULL, NULL); + } + return 0; +} + +static const struct irq_domain_ops xive_ipi_irq_domain_ops = { + .alloc = xive_ipi_irq_domain_alloc, +}; + +static int __init xive_init_ipis(void) +{ + struct fwnode_handle *fwnode; + struct irq_domain *ipi_domain; + unsigned int node; + int ret = -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); + if (!fwnode) + goto out; + + ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, + &xive_ipi_irq_domain_ops, NULL); + if (!ipi_domain) + goto out_free_fwnode; + + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); + if (!xive_ipis) + goto out_free_domain; - /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, 0); - xive_ipi_irq = virq; + for_each_node(node) { + struct xive_ipi_desc *xid = &xive_ipis[node]; + struct xive_ipi_alloc_info info = { node }; - WARN_ON(request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); + /* + * Map one IPI interrupt per node for all cpus of that node. + * Since the HW interrupt number doesn't have any meaning, + * simply use the node number. + */ + ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); + if (ret < 0) + goto out_free_xive_ipis; + xid->irq = ret; + + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); + } + + return ret; + +out_free_xive_ipis: + kfree(xive_ipis); +out_free_domain: + irq_domain_remove(ipi_domain); +out_free_fwnode: + irq_domain_free_fwnode(fwnode); +out: + return ret; +} + +static int xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; } static int xive_setup_cpu_ipi(unsigned int cpu) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); struct xive_cpu *xc; int rc; @@ -1066,9 +1204,12 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1089,8 +1230,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) pr_err("Failed to map IPI CPU %d\n", cpu); return -EIO; } - pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, - xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); + pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu, + xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); /* Unmask it */ xive_do_source_set_mask(&xc->ipi_data, false); @@ -1098,14 +1239,18 @@ static int xive_setup_cpu_ipi(unsigned int cpu) return 0; } -static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) +noinstr static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); + /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1128,7 +1273,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); @@ -1139,7 +1284,7 @@ void __init xive_smp_probe(void) static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { - int rc; + struct xive_irq_data *xd; /* * Mark interrupts as edge sensitive by default so that resend @@ -1147,39 +1292,19 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, */ irq_clear_status_flags(virq, IRQ_LEVEL); -#ifdef CONFIG_SMP - /* IPIs are special and come up with HW number 0 */ - if (hw == 0) { - /* - * IPIs are marked per-cpu. We use separate HW interrupts under - * the hood but associated with the same "linux" interrupt - */ - irq_set_chip_and_handler(virq, &xive_ipi_chip, - handle_percpu_irq); - return 0; - } -#endif - - rc = xive_irq_alloc_data(virq, hw); - if (rc) - return rc; + xd = xive_irq_alloc_data(virq, hw); + if (IS_ERR(xd)) + return PTR_ERR(xd); irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, xd); return 0; } static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - struct irq_data *data = irq_get_irq_data(virq); - unsigned int hw_irq; - - /* XXX Assign BAD number */ - if (!data) - return; - hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq) - xive_irq_free_data(virq); + xive_irq_free_data(virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, @@ -1210,20 +1335,138 @@ static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, return xive_ops->match(node); } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; + +static const struct { + u64 mask; + char *name; +} xive_irq_flags[] = { + { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, + { XIVE_IRQ_FLAG_LSI, "LSI" }, + { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, + { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, +}; + +static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct xive_irq_data *xd; + u64 val; + int i; + + /* No IRQ domain level information. To be done */ + if (!irqd) + return; + + if (!is_xive_irq(irq_data_get_irq_chip(irqd))) + return; + + seq_printf(m, "%*sXIVE:\n", ind, ""); + ind++; + + xd = irq_data_get_irq_chip_data(irqd); + if (!xd) { + seq_printf(m, "%*snot assigned\n", ind, ""); + return; + } + + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); + seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", + xd->saved_p ? "saved" : ""); + seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); + seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); + seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); + seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); + seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); + for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { + if (xd->flags & xive_irq_flags[i].mask) + seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); + } +} +#endif + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +static int xive_irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode), + fwspec->param, fwspec->param_count, + hwirq, type); +} + +static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_fwspec *fwspec = arg; + struct xive_irq_data *xd; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + int i, rc; + + rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type); + if (rc) + return rc; + + pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + /* TODO: call xive_irq_domain_map() */ + + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. Will fix that up below if needed. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); + + /* allocates and sets handler data */ + xd = xive_irq_alloc_data(virq + i, hwirq + i); + if (IS_ERR(xd)) + return PTR_ERR(xd); + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &xive_irq_chip, xd); + irq_set_handler(virq + i, handle_fasteoi_irq); + } + + return 0; +} + +static void xive_irq_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + int i; + + pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) + xive_irq_free_data(virq + i); +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + .alloc = xive_irq_domain_alloc, + .free = xive_irq_domain_free, + .translate = xive_irq_domain_translate, +#endif .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, .xlate = xive_irq_domain_xlate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = xive_irq_domain_debug_show, +#endif }; -static void __init xive_init_host(void) +static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, - &xive_irq_domain_ops, NULL); + xive_irq_domain = irq_domain_create_tree(of_fwnode_handle(np), &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; - irq_set_default_host(xive_irq_domain); + irq_set_default_domain(xive_irq_domain); } static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc) @@ -1249,16 +1492,14 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; + xc->chip_id = XIVE_INVALID_CHIP_ID; + if (xive_ops->prepare_cpu) + xive_ops->prepare_cpu(cpu, xc); per_cpu(xive_cpu, cpu) = xc; } @@ -1283,7 +1524,7 @@ static void xive_setup_cpu(void) #ifdef CONFIG_SMP void xive_smp_setup_cpu(void) { - pr_devel("SMP setup CPU %d\n", smp_processor_id()); + pr_debug("SMP setup CPU %d\n", smp_processor_id()); /* This will have already been done on the boot CPU */ if (smp_processor_id() != boot_cpuid) @@ -1321,13 +1562,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) struct irq_desc *desc = irq_to_desc(irq); struct irq_data *d = irq_desc_get_irq_data(desc); struct xive_irq_data *xd; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); /* * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == 0) + if (d->domain != xive_irq_domain) continue; /* @@ -1340,14 +1580,19 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); + + /* + * Clear saved_p to indicate that it's no longer pending + */ + xd->saved_p = false; /* * For LSIs, we EOI, this will cause a resend if it's * still asserted. Otherwise do an MSI retrigger. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xive_irq_retrigger(d); @@ -1388,7 +1633,7 @@ void xive_flush_interrupt(void) #endif /* CONFIG_SMP */ -void xive_teardown_cpu(void) +noinstr void xive_teardown_cpu(void) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); unsigned int cpu = smp_processor_id(); @@ -1414,8 +1659,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio) { xive_tima = area; xive_tima_offset = offset; @@ -1425,10 +1670,10 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o ppc_md.get_irq = xive_get_irq; __xive_enabled = true; - pr_devel("Initializing host..\n"); - xive_init_host(); + pr_debug("Initializing host..\n"); + xive_init_host(np); - pr_devel("Initializing boot CPU..\n"); + pr_debug("Initializing boot CPU..\n"); /* Allocate per-CPU data and queues */ xive_prepare_cpu(smp_processor_id()); @@ -1462,6 +1707,157 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); + +static int __init xive_store_eoi_cmdline(char *arg) +{ + if (!arg) + return 1; + + if (strncmp(arg, "off", 3) == 0) { + pr_info("StoreEOI disabled on kernel command line\n"); + xive_store_eoi = false; + } + return 1; +} +__setup("xive.store-eoi=", xive_store_eoi_cmdline); + +#ifdef CONFIG_DEBUG_FS +static void xive_debug_show_ipi(struct seq_file *m, int cpu) +{ + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + seq_printf(m, "CPU %d: ", cpu); + if (xc) { + seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); + +#ifdef CONFIG_SMP + { + char buffer[128]; + + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer); + } +#endif + } + seq_puts(m, "\n"); +} + +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int rc; + u32 target; + u8 prio; + u32 lirq; + char buffer[128]; + + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); + if (rc) { + seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); + return; + } + + seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", + hw_irq, target, prio, lirq); + + xive_irq_data_dump(irq_data_get_irq_chip_data(d), buffer, sizeof(buffer)); + seq_puts(m, buffer); + seq_puts(m, "\n"); +} + +static int xive_irq_debug_show(struct seq_file *m, void *private) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xive_debug_show_irq(m, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_irq_debug); + +static int xive_ipi_debug_show(struct seq_file *m, void *private) +{ + int cpu; + + if (xive_ops->debug_show) + xive_ops->debug_show(m, private); + + for_each_online_cpu(cpu) + xive_debug_show_ipi(m, cpu); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug); + +static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio) +{ + int i; + + seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle); + if (q->qpage) { + for (i = 0; i < q->msk + 1; i++) { + if (!(i % 8)) + seq_printf(m, "%05d ", i); + seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i), + (i + 1) % 8 ? " " : "\n"); + } + } + seq_puts(m, "\n"); +} + +static int xive_eq_debug_show(struct seq_file *m, void *private) +{ + int cpu = (long)m->private; + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + if (xc) + xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority], + xive_irq_priority); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_eq_debug); + +static void xive_core_debugfs_create(void) +{ + struct dentry *xive_dir; + struct dentry *xive_eq_dir; + long cpu; + char name[16]; + + xive_dir = debugfs_create_dir("xive", arch_debugfs_dir); + if (IS_ERR(xive_dir)) + return; + + debugfs_create_file("ipis", 0400, xive_dir, + NULL, &xive_ipi_debug_fops); + debugfs_create_file("interrupts", 0400, xive_dir, + NULL, &xive_irq_debug_fops); + xive_eq_dir = debugfs_create_dir("eqs", xive_dir); + for_each_possible_cpu(cpu) { + snprintf(name, sizeof(name), "cpu%ld", cpu); + debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu, + &xive_eq_debug_fops); + } + debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi); + + if (xive_ops->debug_create) + xive_ops->debug_create(xive_dir); +} +#else +static inline void xive_core_debugfs_create(void) { } +#endif /* CONFIG_DEBUG_FS */ + +int xive_core_debug_init(void) +{ + if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS)) + xive_core_debugfs_create(); + + return 0; +} diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 1ca127d052a6..a0934b516933 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2016,2017 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "xive: " fmt @@ -17,13 +13,15 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/delay.h> #include <linux/cpumask.h> #include <linux/mm.h> +#include <linux/kmemleak.h> -#include <asm/prom.h> +#include <asm/machdep.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/irq.h> @@ -43,6 +41,7 @@ static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; static bool xive_has_single_esc; +bool xive_has_save_restore; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -64,14 +63,10 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) opal_flags = be64_to_cpu(flags); if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; + if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2) + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG) - data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG; - if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_MASK_FW; - if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); data->trig_page = be64_to_cpu(trig_page); data->esb_shift = be32_to_cpu(esb_shift); @@ -115,6 +110,22 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) } EXPORT_SYMBOL_GPL(xive_native_configure_irq); +static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, + u32 *sw_irq) +{ + s64 rc; + __be64 vp; + __be32 lirq; + + rc = opal_xive_get_irq_config(hw_irq, &vp, prio, &lirq); + + *target = be64_to_cpu(vp); + *sw_irq = be32_to_cpu(lirq); + + return rc == 0 ? 0 : -ENXIO; +} + +#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__) /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, @@ -143,7 +154,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, &esc_irq_be, NULL); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc); rc = -EIO; goto fail; } @@ -166,7 +177,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, msleep(OPAL_BUSY_DELAY_MS); } if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc); rc = -EIO; } else { /* @@ -193,7 +204,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) msleep(OPAL_BUSY_DELAY_MS); } if (rc) - pr_err("Error %lld disabling queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc); } void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) @@ -235,6 +246,17 @@ static bool xive_native_match(struct device_node *node) return of_device_is_compatible(node, "ibm,opal-xive-vc"); } +static s64 opal_xive_allocate_irq(u32 chip_id) +{ + s64 irq = opal_xive_allocate_irq_raw(chip_id); + + /* + * Old versions of skiboot can incorrectly return 0xffffffff to + * indicate no space, fix it up here. + */ + return irq == 0xffffffff ? OPAL_RESOURCE : irq; +} + #ifdef CONFIG_SMP static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) { @@ -258,12 +280,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) } #endif /* CONFIG_SMP */ -u32 xive_native_alloc_irq(void) +u32 xive_native_alloc_irq_on_chip(u32 chip_id) { s64 rc; for (;;) { - rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); + rc = opal_xive_allocate_irq(chip_id); if (rc != OPAL_BUSY) break; msleep(OPAL_BUSY_DELAY_MS); @@ -272,7 +294,7 @@ u32 xive_native_alloc_irq(void) return 0; return rc; } -EXPORT_SYMBOL_GPL(xive_native_alloc_irq); +EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip); void xive_native_free_irq(u32 irq) { @@ -291,7 +313,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -299,7 +321,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } @@ -361,13 +383,9 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } -static void xive_native_eoi(u32 hw_irq) +static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc) { - /* - * Not normally used except if specific interrupts need - * a workaround on EOI. - */ - opal_int_eoi(hw_irq); + xc->chip_id = cpu_to_chip_id(cpu); } static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) @@ -397,7 +415,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) return; } - /* Grab it's CAM value */ + /* Grab its CAM value */ rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL); if (rc) { pr_err("Failed to get pool VP info CPU %d\n", cpu); @@ -437,15 +455,30 @@ void xive_native_sync_source(u32 hw_irq) } EXPORT_SYMBOL_GPL(xive_native_sync_source); +void xive_native_sync_queue(u32 hw_irq) +{ + opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq); +} +EXPORT_SYMBOL_GPL(xive_native_sync_queue); + +#ifdef CONFIG_DEBUG_FS +static int xive_native_debug_create(struct dentry *xive_dir) +{ + debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore); + return 0; +} +#endif + static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, + .get_irq_config = xive_native_get_irq_config, .setup_queue = xive_native_setup_queue, .cleanup_queue = xive_native_cleanup_queue, .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, - .eoi = xive_native_eoi, + .prepare_cpu = xive_native_prepare_cpu, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, @@ -453,10 +486,13 @@ static const struct xive_ops xive_native_ops = { .get_ipi = xive_native_get_ipi, .put_ipi = xive_native_put_ipi, #endif /* CONFIG_SMP */ +#ifdef CONFIG_DEBUG_FS + .debug_create = xive_native_debug_create, +#endif /* CONFIG_DEBUG_FS */ .name = "native", }; -static bool xive_parse_provisioning(struct device_node *np) +static bool __init xive_parse_provisioning(struct device_node *np) { int rc; @@ -496,16 +532,16 @@ static bool xive_parse_provisioning(struct device_node *np) return true; } -static void xive_native_setup_pools(void) +static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); + pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) - pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); + pr_err("Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", + pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } @@ -515,14 +551,15 @@ u32 xive_native_default_eq_shift(void) } EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); +unsigned long xive_tima_os; +EXPORT_SYMBOL_GPL(xive_tima_os); + bool __init xive_native_init(void) { struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio = 7; - const __be32 *p; u32 val, cpu; s64 rc; @@ -540,12 +577,12 @@ bool __init xive_native_init(void) /* Resource 1 is HV window */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } /* Read number of priorities */ @@ -553,41 +590,55 @@ bool __init xive_native_init(void) max_prio = val - 1; /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; } /* Do we support single escalation */ - if (of_get_property(np, "single-escalation-support", NULL) != NULL) - xive_has_single_esc = true; + xive_has_single_esc = of_property_read_bool(np, "single-escalation-support"); + + xive_has_save_restore = of_property_read_bool(np, "vp-save-restore"); /* Configure Thread Management areas for KVM */ for_each_possible_cpu(cpu) kvmppc_set_xive_tima(cpu, r.start, tima); - /* Grab size of provisionning pages */ + /* Resource 2 is OS window */ + if (of_address_to_resource(np, 2, &r)) { + pr_err("Failed to get thread mgmnt area resource\n"); + goto err_put; + } + + xive_tima_os = r.start; + + /* Grab size of provisioning pages */ xive_parse_provisioning(np); /* Switch the XIVE to exploitation mode */ rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL); if (rc) { pr_err("Switch to exploitation mode failed with error %lld\n", rc); - return false; + goto err_put; } /* Setup some dummy HV pool VPs */ xive_native_setup_pools(); /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, + if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); - return false; + goto err_put; } + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_put: + of_node_put(np); + return false; } static bool xive_native_provision_pages(void) @@ -607,6 +658,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; @@ -672,6 +724,8 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to enable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_enable_vp); @@ -686,6 +740,8 @@ int xive_native_disable_vp(u32 vp_id) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to disable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_disable_vp); @@ -697,8 +753,10 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) s64 rc; rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be); - if (rc) + if (rc) { + vp_err(vp_id, "Failed to get VP info : %lld\n", rc); return -EIO; + } *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu; *out_chip_id = be32_to_cpu(vp_chip_id_be); @@ -711,3 +769,107 @@ bool xive_native_has_single_escalation(void) return xive_has_single_esc; } EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); + +bool xive_native_has_save_restore(void) +{ + return xive_has_save_restore; +} +EXPORT_SYMBOL_GPL(xive_native_has_save_restore); + +int xive_native_get_queue_info(u32 vp_id, u32 prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags) +{ + __be64 qpage; + __be64 qsize; + __be64 qeoi_page; + __be32 escalate_irq; + __be64 qflags; + s64 rc; + + rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, + &qeoi_page, &escalate_irq, &qflags); + if (rc) { + vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc); + return -EIO; + } + + if (out_qpage) + *out_qpage = be64_to_cpu(qpage); + if (out_qsize) + *out_qsize = be64_to_cpu(qsize); + if (out_qeoi_page) + *out_qeoi_page = be64_to_cpu(qeoi_page); + if (out_escalate_irq) + *out_escalate_irq = be32_to_cpu(escalate_irq); + if (out_qflags) + *out_qflags = be64_to_cpu(qflags); + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_queue_info); + +int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) +{ + __be32 opal_qtoggle; + __be32 opal_qindex; + s64 rc; + + rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, + &opal_qindex); + if (rc) { + vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc); + return -EIO; + } + + if (qtoggle) + *qtoggle = be32_to_cpu(opal_qtoggle); + if (qindex) + *qindex = be32_to_cpu(opal_qindex); + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_queue_state); + +int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) +{ + s64 rc; + + rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); + if (rc) { + vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc); + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_set_queue_state); + +bool xive_native_has_queue_state_support(void) +{ + return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) && + opal_check_token(OPAL_XIVE_SET_QUEUE_STATE); +} +EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support); + +int xive_native_get_vp_state(u32 vp_id, u64 *out_state) +{ + __be64 state; + s64 rc; + + rc = opal_xive_get_vp_state(vp_id, &state); + if (rc) { + vp_err(vp_id, "failed to get vp state : %lld\n", rc); + return -EIO; + } + + if (out_state) + *out_state = be64_to_cpu(state); + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_vp_state); + +machine_arch_initcall(powernv, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 575db3b06a6b..5aedbe3e8e6a 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -1,26 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2016,2017 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define pr_fmt(fmt) "xive: " fmt #include <linux/types.h> #include <linux/irq.h> +#include <linux/seq_file.h> #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/bitmap.h> #include <linux/cpumask.h> #include <linux/mm.h> #include <linux/delay.h> +#include <linux/libfdt.h> +#include <asm/machdep.h> #include <asm/prom.h> #include <asm/io.h> #include <asm/smp.h> @@ -29,6 +31,8 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/hvcall.h> +#include <asm/svm.h> +#include <asm/ultravisor.h> #include "xive-internal.h" @@ -44,18 +48,22 @@ struct xive_irq_bitmap { static LIST_HEAD(xive_irq_bitmaps); -static int xive_irq_bitmap_add(int base, int count) +static int __init xive_irq_bitmap_add(int base, int count) { struct xive_irq_bitmap *xibm; - xibm = kzalloc(sizeof(*xibm), GFP_ATOMIC); + xibm = kzalloc(sizeof(*xibm), GFP_KERNEL); if (!xibm) return -ENOMEM; spin_lock_init(&xibm->lock); xibm->base = base; xibm->count = count; - xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL); + if (!xibm->bitmap) { + kfree(xibm); + return -ENOMEM; + } list_add(&xibm->list, &xive_irq_bitmaps); pr_info("Using IRQ range [%x-%x]", xibm->base, @@ -63,6 +71,17 @@ static int xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + bitmap_free(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -169,7 +188,7 @@ static long plpar_int_get_source_info(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); + pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -178,8 +197,8 @@ static long plpar_int_get_source_info(unsigned long flags, *trig_page = retbuf[2]; *esb_shift = retbuf[3]; - pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", - retbuf[0], retbuf[1], retbuf[2], retbuf[3]); + pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n", + lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]); return 0; } @@ -196,8 +215,8 @@ static long plpar_int_set_source_config(unsigned long flags, long rc; - pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", - flags, lisn, target, prio, sw_irq); + pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n", + flags, lisn, target, prio, sw_irq); do { @@ -206,7 +225,7 @@ static long plpar_int_set_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", + pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n", lisn, target, prio, rc); return rc; } @@ -214,6 +233,38 @@ static long plpar_int_set_source_config(unsigned long flags, return 0; } +static long plpar_int_get_source_config(unsigned long flags, + unsigned long lisn, + unsigned long *target, + unsigned long *prio, + unsigned long *sw_irq) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn); + + do { + rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, + target, prio, sw_irq); + } while (plpar_busy_delay(rc)); + + if (rc) { + pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n", + lisn, rc); + return rc; + } + + *target = retbuf[0]; + *prio = retbuf[1]; + *sw_irq = retbuf[2]; + + pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n", + retbuf[0], retbuf[1], retbuf[2]); + + return 0; +} + static long plpar_int_get_queue_info(unsigned long flags, unsigned long target, unsigned long priority, @@ -237,8 +288,8 @@ static long plpar_int_get_queue_info(unsigned long flags, *esn_page = retbuf[0]; *esn_size = retbuf[1]; - pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", - retbuf[0], retbuf[1]); + pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n", + target, priority, retbuf[0], retbuf[1]); return 0; } @@ -253,8 +304,8 @@ static long plpar_int_set_queue_config(unsigned long flags, { long rc; - pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", - flags, target, priority, qpage, qsize); + pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n", + flags, target, priority, qpage, qsize); do { rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, @@ -262,7 +313,7 @@ static long plpar_int_set_queue_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n", target, priority, qpage, rc); return rc; } @@ -279,7 +330,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn) } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); + pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc); return rc; } @@ -297,8 +348,8 @@ static long plpar_int_esb(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", - flags, lisn, offset, in_data); + pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n", + flags, lisn, offset, in_data); do { rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, @@ -306,7 +357,7 @@ static long plpar_int_esb(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", + pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n", lisn, offset, rc); return rc; } @@ -359,20 +410,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->esb_shift = esb_shift; data->trig_page = trig_page; + data->hw_irq = hw_irq; + /* * No chip-id for the sPAPR backend. This has an impact how we * pick a target. See xive_pick_irq_target(). */ data->src_chip = XIVE_INVALID_CHIP_ID; + /* + * When the H_INT_ESB flag is set, the H_INT_ESB hcall should + * be used for interrupt management. Skip the remapping of the + * ESB pages which are not available. + */ + if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB) + return 0; + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); if (!data->eoi_mmio) { pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); return -ENOMEM; } - data->hw_irq = hw_irq; - /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; @@ -381,6 +440,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); if (!data->trig_mmio) { + iounmap(data->eoi_mmio); pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); return -ENOMEM; } @@ -397,6 +457,24 @@ static int xive_spapr_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) return rc == 0 ? 0 : -ENXIO; } +static int xive_spapr_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, + u32 *sw_irq) +{ + long rc; + unsigned long h_target; + unsigned long h_prio; + unsigned long h_sw_irq; + + rc = plpar_int_get_source_config(0, hw_irq, &h_target, &h_prio, + &h_sw_irq); + + *target = h_target; + *prio = h_prio; + *sw_irq = h_sw_irq; + + return rc == 0 ? 0 : -ENXIO; +} + /* This can be called multiple time to change a queue configuration */ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, __be32 *qpage, u32 order) @@ -442,6 +520,9 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = -EIO; } else { q->qpage = qpage; + if (is_secure_guest()) + uv_share_page(PHYS_PFN(qpage_phys), + 1 << xive_alloc_order(order)); } fail: return rc; @@ -475,6 +556,8 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); + if (is_secure_guest()) + uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order); free_pages((unsigned long)q->qpage, alloc_order); q->qpage = NULL; } @@ -482,7 +565,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, static bool xive_spapr_match(struct device_node *node) { /* Ignore cascaded controllers for the moment */ - return 1; + return true; } #ifdef CONFIG_SMP @@ -501,11 +584,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ @@ -561,11 +644,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc) } } -static void xive_spapr_eoi(u32 hw_irq) -{ - /* Not used */; -} - static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { /* Only some debug on the TIMA settings */ @@ -586,15 +664,33 @@ static void xive_spapr_sync_source(u32 hw_irq) plpar_int_sync(0, hw_irq); } +static int xive_spapr_debug_show(struct seq_file *m, void *private) +{ + struct xive_irq_bitmap *xibm; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (!buf) + return -ENOMEM; + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + memset(buf, 0, PAGE_SIZE); + bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); + seq_printf(m, "bitmap #%d: %s", xibm->count, buf); + } + kfree(buf); + + return 0; +} + static const struct xive_ops xive_spapr_ops = { .populate_irq_data = xive_spapr_populate_irq_data, .configure_irq = xive_spapr_configure_irq, + .get_irq_config = xive_spapr_get_irq_config, .setup_queue = xive_spapr_setup_queue, .cleanup_queue = xive_spapr_cleanup_queue, .match = xive_spapr_match, .shutdown = xive_spapr_shutdown, .update_pending = xive_spapr_update_pending, - .eoi = xive_spapr_eoi, .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, @@ -602,6 +698,7 @@ static const struct xive_ops xive_spapr_ops = { #ifdef CONFIG_SMP .get_ipi = xive_spapr_get_ipi, .put_ipi = xive_spapr_put_ipi, + .debug_show = xive_spapr_debug_show, #endif /* CONFIG_SMP */ .name = "spapr", }; @@ -609,7 +706,7 @@ static const struct xive_ops xive_spapr_ops = { /* * get max priority from "/ibm,plat-res-int-priorities" */ -static bool xive_get_max_prio(u8 *max_prio) +static bool __init xive_get_max_prio(u8 *max_prio) { struct device_node *rootdn; const __be32 *reg; @@ -623,6 +720,7 @@ static bool xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; @@ -663,19 +761,67 @@ static bool xive_get_max_prio(u8 *max_prio) return true; } +static const u8 *__init get_vec5_feature(unsigned int index) +{ + unsigned long root, chosen; + int size; + const u8 *vec5; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) + return NULL; + + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) + return NULL; + + if (size <= index) + return NULL; + + return vec5 + index; +} + +static bool __init xive_spapr_disabled(void) +{ + const u8 *vec5_xive; + + vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT)); + if (vec5_xive) { + u8 val; + + val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT); + switch (val) { + case OV5_FEAT(OV5_XIVE_EITHER): + case OV5_FEAT(OV5_XIVE_LEGACY): + break; + case OV5_FEAT(OV5_XIVE_EXPLOIT): + /* Hypervisor only supports XIVE */ + if (xive_cmdline_disabled) + pr_warn("WARNING: Ignoring cmdline option xive=off\n"); + return false; + default: + pr_warn("%s: Unknown xive support option: 0x%x\n", + __func__, val); + break; + } + } + + return xive_cmdline_disabled; +} + bool __init xive_spapr_init(void) { struct device_node *np; struct resource r; void __iomem *tima; - struct property *prop; u8 max_prio; u32 val; u32 len; const __be32 *reg; - int i; + int i, err; - if (xive_cmdline_disabled) + if (xive_spapr_disabled()) return false; pr_devel("%s()\n", __func__); @@ -689,44 +835,58 @@ bool __init xive_spapr_init(void) /* Resource 1 is the OS ring TIMA */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ - of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { + of_property_for_each_u32(np, "ibm,xive-eq-sizes", val) { xive_queue_shift = val; if (val == PAGE_SHIFT) break; } /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + goto err_mem_free; + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); +err_put: + of_node_put(np); + return false; } + +machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index f34abed0c05f..fe6d95d54af9 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -1,14 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 2016,2017 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP @@ -37,27 +40,32 @@ struct xive_cpu { struct xive_ops { int (*populate_irq_data)(u32 hw_irq, struct xive_irq_data *data); int (*configure_irq)(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); + int (*get_irq_config)(u32 hw_irq, u32 *target, u8 *prio, + u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); + void (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc); bool (*match)(struct device_node *np); void (*shutdown)(void); void (*update_pending)(struct xive_cpu *xc); - void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc); void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif + int (*debug_show)(struct seq_file *m, void *private); + int (*debug_create)(struct dentry *xive_dir); const char *name; }; -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio); +bool xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); +int xive_core_debug_init(void); static inline u32 xive_alloc_order(u32 queue_shift) { @@ -65,5 +73,6 @@ static inline u32 xive_alloc_order(u32 queue_shift) } extern bool xive_cmdline_disabled; +extern bool xive_has_save_restore; #endif /* __XIVE_INTERNAL_H */ |
