diff options
Diffstat (limited to 'arch/powerpc/sysdev/xive')
-rw-r--r-- | arch/powerpc/sysdev/xive/common.c | 719 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xive/native.c | 117 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xive/spapr.c | 139 | ||||
-rw-r--r-- | arch/powerpc/sysdev/xive/xive-internal.h | 17 |
4 files changed, 698 insertions, 294 deletions
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f5fadbd2533a..a289cb97c1d7 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -9,6 +9,7 @@ #include <linux/threads.h> #include <linux/kernel.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/debugfs.h> #include <linux/smp.h> #include <linux/interrupt.h> @@ -19,8 +20,8 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/msi.h> +#include <linux/vmalloc.h> -#include <asm/prom.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/machdep.h> @@ -61,22 +62,37 @@ static const struct xive_ops *xive_ops; static struct irq_domain *xive_irq_domain; #ifdef CONFIG_SMP -/* The IPIs all use the same logical irq number */ -static u32 xive_ipi_irq; +/* The IPIs use the same logical irq number when on the same chip */ +static struct xive_ipi_desc { + unsigned int irq; + char name[16]; + atomic_t started; +} *xive_ipis; + +/* + * Use early_cpu_to_node() for hot-plugged CPUs + */ +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) +{ + return xive_ipis[early_cpu_to_node(cpu)].irq; +} #endif /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); +/* An invalid CPU target */ +#define XIVE_INVALID_TARGET (-1) + /* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this + * Global toggle to switch on/off StoreEOI */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) +static bool xive_store_eoi = true; -/* An invalid CPU target */ -#define XIVE_INVALID_TARGET (-1) +static bool xive_is_store_eoi(struct xive_irq_data *xd) +{ + return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi; +} /* * Read the next entry in a queue, return its content if it's valid @@ -202,9 +218,8 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; + if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd)) + offset |= XIVE_ESB_LD_ST_MO; if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0); @@ -216,16 +231,27 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) { - /* Handle HW errata */ - if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) - offset |= offset << 4; - if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) xive_ops->esb_rw(xd->hw_irq, offset, data, 1); else out_be64(xd->eoi_mmio + offset, data); } +#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS) +static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size) +{ + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx", + xive_is_store_eoi(xd) ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-', + xd->trig_page, xd->eoi_page); +} +#endif + #ifdef CONFIG_XMON static notrace void xive_dump_eq(const char *name, struct xive_q *q) { @@ -251,11 +277,10 @@ notrace void xmon_xive_do_dump(int cpu) #ifdef CONFIG_SMP { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + char buffer[128]; - xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer); } #endif xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); @@ -263,6 +288,13 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { int rc; @@ -279,19 +311,34 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); + char buffer[128]; - xmon_printf("PQ=%c%c", - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(irq_data_get_irq_handler_data(d), + buffer, sizeof(buffer)); + xmon_printf("%s", buffer); } xmon_printf("\n"); return 0; } +void xmon_xive_get_irq_all(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xmon_xive_get_irq_config(irqd_to_hwirq(d), d); + } +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) @@ -351,50 +398,40 @@ static void xive_do_queue_eoi(struct xive_cpu *xc) * EOI an interrupt at the source. There are several methods * to do this depending on the HW version and source type */ -static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd) +static void xive_do_source_eoi(struct xive_irq_data *xd) { + u8 eoi_val; + xd->stale_p = false; + /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (xive_is_store_eoi(xd)) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); - else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) { - /* - * The FW told us to call it. This happens for some - * interrupt sources that need additional HW whacking - * beyond the ESB manipulation. For example LPC interrupts - * on P9 DD1.0 needed a latch to be clared in the LPC bridge - * itself. The Firmware will take care of it. - */ - if (WARN_ON_ONCE(!xive_ops->eoi)) - return; - xive_ops->eoi(hw_irq); - } else { - u8 eoi_val; + return; + } - /* - * Otherwise for EOI, we use the special MMIO that does - * a clear of both P and Q and returns the old Q, - * except for LSIs where we use the "EOI cycle" special - * load. - * - * This allows us to then do a re-trigger if Q was set - * rather than synthesizing an interrupt in software - * - * For LSIs the HW EOI cycle is used rather than PQ bits, - * as they are automatically re-triggred in HW when still - * pending. - */ - if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_esb_read(xd, XIVE_ESB_LOAD_EOI); - else { - eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); - DBG_VERBOSE("eoi_val=%x\n", eoi_val); - - /* Re-trigger if needed */ - if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) - out_be64(xd->trig_mmio, 0); - } + /* + * For LSIs, we use the "EOI cycle" special load rather than + * PQ bits, as they are automatically re-triggered in HW when + * still pending. + */ + if (xd->flags & XIVE_IRQ_FLAG_LSI) { + xive_esb_read(xd, XIVE_ESB_LOAD_EOI); + return; } + + /* + * Otherwise, we use the special MMIO that does a clear of + * both P and Q and returns the old Q. This allows us to then + * do a re-trigger if Q was set rather than synthesizing an + * interrupt in software + */ + eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00); + DBG_VERBOSE("eoi_val=%x\n", eoi_val); + + /* Re-trigger if needed */ + if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio) + out_be64(xd->trig_mmio, 0); } /* irq_chip eoi callback, called with irq descriptor lock held */ @@ -411,8 +448,8 @@ static void xive_irq_eoi(struct irq_data *d) * been passed-through to a KVM guest */ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && - !(xd->flags & XIVE_IRQ_NO_EOI)) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + !(xd->flags & XIVE_IRQ_FLAG_NO_EOI)) + xive_do_source_eoi(xd); else xd->stale_p = true; @@ -427,15 +464,15 @@ static void xive_irq_eoi(struct irq_data *d) } /* - * Helper used to mask and unmask an interrupt source. This - * is only called for normal interrupts that do not require - * masking/unmasking via firmware. + * Helper used to mask and unmask an interrupt source. */ static void xive_do_source_set_mask(struct xive_irq_data *xd, bool mask) { u64 val; + pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un"); + /* * If the interrupt had P set, it may be in a queue. * @@ -597,18 +634,8 @@ static unsigned int xive_irq_startup(struct irq_data *d) xd->saved_p = false; xd->stale_p = false; - pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); -#ifdef CONFIG_PCI_MSI - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (irq_data_get_msi_desc(d)) - pci_msi_unmask_irq(d); -#endif + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); /* Pick a target */ target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); @@ -649,8 +676,7 @@ static void xive_irq_shutdown(struct irq_data *d) struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) return; @@ -674,21 +700,7 @@ static void xive_irq_unmask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); - - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call FW to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - xive_irq_priority, d->irq); - return; - } + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, false); } @@ -697,21 +709,7 @@ static void xive_irq_mask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); - - /* - * This is a workaround for PCI LSI problems on P9, for - * these, we call OPAL to set the mask. The problems might - * be fixed by P9 DD2.0, if that is the case, firmware - * will no longer set that flag. - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - xive_ops->configure_irq(hw_irq, - get_hard_smp_processor_id(xd->target), - 0xff, d->irq); - return; - } + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, true); } @@ -725,16 +723,12 @@ static int xive_irq_set_affinity(struct irq_data *d, u32 target, old_target; int rc = 0; - pr_devel("xive_irq_set_affinity: irq %d\n", d->irq); + pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) return -EINVAL; - /* Don't do anything if the interrupt isn't started */ - if (!irqd_is_started(d)) - return IRQ_SET_MASK_OK; - /* * If existing target is already in the new mask, and is * online then do nothing. @@ -770,7 +764,7 @@ static int xive_irq_set_affinity(struct irq_data *d, return rc; } - pr_devel(" target: 0x%x\n", target); + pr_debug(" target: 0x%x\n", target); xd->target = target; /* Give up previous target */ @@ -789,7 +783,7 @@ static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type) * the corresponding descriptor bits mind you but those will in turn * affect the resend function when re-enabling an edge interrupt. * - * Set set the default to edge as explained in map(). + * Set the default to edge as explained in map(). */ if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) flow_type = IRQ_TYPE_EDGE_RISING; @@ -832,14 +826,7 @@ static int xive_irq_retrigger(struct irq_data *d) * 11, then perform an EOI. */ xive_esb_read(xd, XIVE_ESB_SET_PQ_11); - - /* - * Note: We pass "0" to the hw_irq argument in order to - * avoid calling into the backend EOI code which we don't - * want to do in the case of a re-trigger. Backends typically - * only do EOI for LSIs anyway. - */ - xive_do_source_eoi(0, xd); + xive_do_source_eoi(xd); return 1; } @@ -856,13 +843,6 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) u8 pq; /* - * We only support this on interrupts that do not require - * firmware calls for masking and unmasking - */ - if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) - return -EIO; - - /* * This is called by KVM with state non-NULL for enabling * pass-through or NULL for disabling it */ @@ -961,7 +941,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state) * while masked, the generic code will re-mask it anyway. */ if (!xd->saved_p) - xive_do_source_eoi(hw_irq, xd); + xive_do_source_eoi(xd); } return 0; @@ -972,12 +952,22 @@ static int xive_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + u8 pq; switch (which) { case IRQCHIP_STATE_ACTIVE: - *state = !xd->stale_p && - (xd->saved_p || - !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); return 0; default: return -EINVAL; @@ -1006,6 +996,8 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { + pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq); + if (xd->eoi_mmio) { iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) @@ -1047,7 +1039,7 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) return 0; } -static void xive_irq_free_data(unsigned int virq) +void xive_irq_free_data(unsigned int virq) { struct xive_irq_data *xd = irq_get_handler_data(virq); @@ -1057,6 +1049,7 @@ static void xive_irq_free_data(unsigned int virq) xive_cleanup_irq_data(xd); kfree(xd); } +EXPORT_SYMBOL_GPL(xive_irq_free_data); #ifdef CONFIG_SMP @@ -1092,7 +1085,7 @@ static void xive_ipi_eoi(struct irq_data *d) DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); + xive_do_source_eoi(&xc->ipi_data); xive_do_queue_eoi(xc); } @@ -1111,28 +1104,100 @@ static struct irq_chip xive_ipi_chip = { .irq_unmask = xive_ipi_do_nothing, }; -static void __init xive_request_ipi(void) +/* + * IPIs are marked per-cpu. We use separate HW interrupts under the + * hood but associated with the same "linux" interrupt + */ +struct xive_ipi_alloc_info { + irq_hw_number_t hwirq; +}; + +static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - unsigned int virq; + struct xive_ipi_alloc_info *info = arg; + int i; - /* - * Initialization failed, move on, we might manage to - * reach the point where we display our errors before - * the system falls appart - */ - if (!xive_irq_domain) - return; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, + domain->host_data, handle_percpu_irq, + NULL, NULL); + } + return 0; +} + +static const struct irq_domain_ops xive_ipi_irq_domain_ops = { + .alloc = xive_ipi_irq_domain_alloc, +}; + +static int __init xive_init_ipis(void) +{ + struct fwnode_handle *fwnode; + struct irq_domain *ipi_domain; + unsigned int node; + int ret = -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); + if (!fwnode) + goto out; - /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, 0); - xive_ipi_irq = virq; + ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, + &xive_ipi_irq_domain_ops, NULL); + if (!ipi_domain) + goto out_free_fwnode; - WARN_ON(request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); + if (!xive_ipis) + goto out_free_domain; + + for_each_node(node) { + struct xive_ipi_desc *xid = &xive_ipis[node]; + struct xive_ipi_alloc_info info = { node }; + + /* + * Map one IPI interrupt per node for all cpus of that node. + * Since the HW interrupt number doesn't have any meaning, + * simply use the node number. + */ + ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); + if (ret < 0) + goto out_free_xive_ipis; + xid->irq = ret; + + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); + } + + return ret; + +out_free_xive_ipis: + kfree(xive_ipis); +out_free_domain: + irq_domain_remove(ipi_domain); +out_free_fwnode: + irq_domain_free_fwnode(fwnode); +out: + return ret; +} + +static int xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; } static int xive_setup_cpu_ipi(unsigned int cpu) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); struct xive_cpu *xc; int rc; @@ -1141,9 +1206,12 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1164,8 +1232,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) pr_err("Failed to map IPI CPU %d\n", cpu); return -EIO; } - pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, - xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); + pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu, + xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); /* Unmask it */ xive_do_source_set_mask(&xc->ipi_data, false); @@ -1173,14 +1241,18 @@ static int xive_setup_cpu_ipi(unsigned int cpu) return 0; } -static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) +noinstr static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); + /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1203,7 +1275,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); @@ -1222,19 +1294,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, */ irq_clear_status_flags(virq, IRQ_LEVEL); -#ifdef CONFIG_SMP - /* IPIs are special and come up with HW number 0 */ - if (hw == 0) { - /* - * IPIs are marked per-cpu. We use separate HW interrupts under - * the hood but associated with the same "linux" interrupt - */ - irq_set_chip_and_handler(virq, &xive_ipi_chip, - handle_percpu_irq); - return 0; - } -#endif - rc = xive_irq_alloc_data(virq, hw); if (rc) return rc; @@ -1246,15 +1305,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - struct irq_data *data = irq_get_irq_data(virq); - unsigned int hw_irq; - - /* XXX Assign BAD number */ - if (!data) - return; - hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq) - xive_irq_free_data(virq); + xive_irq_free_data(virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, @@ -1285,17 +1336,135 @@ static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node, return xive_ops->match(node); } +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" }; + +static const struct { + u64 mask; + char *name; +} xive_irq_flags[] = { + { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" }, + { XIVE_IRQ_FLAG_LSI, "LSI" }, + { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" }, + { XIVE_IRQ_FLAG_NO_EOI, "NO_EOI" }, +}; + +static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct xive_irq_data *xd; + u64 val; + int i; + + /* No IRQ domain level information. To be done */ + if (!irqd) + return; + + if (!is_xive_irq(irq_data_get_irq_chip(irqd))) + return; + + seq_printf(m, "%*sXIVE:\n", ind, ""); + ind++; + + xd = irq_data_get_irq_handler_data(irqd); + if (!xd) { + seq_printf(m, "%*snot assigned\n", ind, ""); + return; + } + + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "%*sESB: %s\n", ind, "", esb_names[val & 0x3]); + seq_printf(m, "%*sPstate: %s %s\n", ind, "", xd->stale_p ? "stale" : "", + xd->saved_p ? "saved" : ""); + seq_printf(m, "%*sTarget: %d\n", ind, "", xd->target); + seq_printf(m, "%*sChip: %d\n", ind, "", xd->src_chip); + seq_printf(m, "%*sTrigger: 0x%016llx\n", ind, "", xd->trig_page); + seq_printf(m, "%*sEOI: 0x%016llx\n", ind, "", xd->eoi_page); + seq_printf(m, "%*sFlags: 0x%llx\n", ind, "", xd->flags); + for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) { + if (xd->flags & xive_irq_flags[i].mask) + seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name); + } +} +#endif + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +static int xive_irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode), + fwspec->param, fwspec->param_count, + hwirq, type); +} + +static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_fwspec *fwspec = arg; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + int i, rc; + + rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type); + if (rc) + return rc; + + pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + /* TODO: call xive_irq_domain_map() */ + + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. Will fix that up below if needed. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); + + /* allocates and sets handler data */ + rc = xive_irq_alloc_data(virq + i, hwirq + i); + if (rc) + return rc; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &xive_irq_chip, domain->host_data); + irq_set_handler(virq + i, handle_fasteoi_irq); + } + + return 0; +} + +static void xive_irq_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + int i; + + pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) + xive_irq_free_data(virq + i); +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + .alloc = xive_irq_domain_alloc, + .free = xive_irq_domain_free, + .translate = xive_irq_domain_translate, +#endif .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, .xlate = xive_irq_domain_xlate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = xive_irq_domain_debug_show, +#endif }; -static void __init xive_init_host(void) +static void __init xive_init_host(struct device_node *np) { - xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ, - &xive_irq_domain_ops, NULL); + xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL); if (WARN_ON(xive_irq_domain == NULL)) return; irq_set_default_host(xive_irq_domain); @@ -1324,16 +1493,14 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; + xc->chip_id = XIVE_INVALID_CHIP_ID; + if (xive_ops->prepare_cpu) + xive_ops->prepare_cpu(cpu, xc); per_cpu(xive_cpu, cpu) = xc; } @@ -1358,7 +1525,7 @@ static void xive_setup_cpu(void) #ifdef CONFIG_SMP void xive_smp_setup_cpu(void) { - pr_devel("SMP setup CPU %d\n", smp_processor_id()); + pr_debug("SMP setup CPU %d\n", smp_processor_id()); /* This will have already been done on the boot CPU */ if (smp_processor_id() != boot_cpuid) @@ -1396,13 +1563,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) struct irq_desc *desc = irq_to_desc(irq); struct irq_data *d = irq_desc_get_irq_data(desc); struct xive_irq_data *xd; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); /* * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == 0) + if (d->domain != xive_irq_domain) continue; /* @@ -1427,7 +1593,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) * still asserted. Otherwise do an MSI retrigger. */ if (xd->flags & XIVE_IRQ_FLAG_LSI) - xive_do_source_eoi(irqd_to_hwirq(d), xd); + xive_do_source_eoi(xd); else xive_irq_retrigger(d); @@ -1468,7 +1634,7 @@ void xive_flush_interrupt(void) #endif /* CONFIG_SMP */ -void xive_teardown_cpu(void) +noinstr void xive_teardown_cpu(void) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); unsigned int cpu = smp_processor_id(); @@ -1494,8 +1660,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio) { xive_tima = area; xive_tima_offset = offset; @@ -1505,10 +1671,10 @@ bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 o ppc_md.get_irq = xive_get_irq; __xive_enabled = true; - pr_devel("Initializing host..\n"); - xive_init_host(); + pr_debug("Initializing host..\n"); + xive_init_host(np); - pr_devel("Initializing boot CPU..\n"); + pr_debug("Initializing boot CPU..\n"); /* Allocate per-CPU data and queues */ xive_prepare_cpu(smp_processor_id()); @@ -1542,6 +1708,157 @@ __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift) static int __init xive_off(char *arg) { xive_cmdline_disabled = true; - return 0; + return 1; } __setup("xive=off", xive_off); + +static int __init xive_store_eoi_cmdline(char *arg) +{ + if (!arg) + return 1; + + if (strncmp(arg, "off", 3) == 0) { + pr_info("StoreEOI disabled on kernel command line\n"); + xive_store_eoi = false; + } + return 1; +} +__setup("xive.store-eoi=", xive_store_eoi_cmdline); + +#ifdef CONFIG_DEBUG_FS +static void xive_debug_show_ipi(struct seq_file *m, int cpu) +{ + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + seq_printf(m, "CPU %d: ", cpu); + if (xc) { + seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); + +#ifdef CONFIG_SMP + { + char buffer[128]; + + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer); + } +#endif + } + seq_puts(m, "\n"); +} + +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int rc; + u32 target; + u8 prio; + u32 lirq; + char buffer[128]; + + rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); + if (rc) { + seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); + return; + } + + seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", + hw_irq, target, prio, lirq); + + xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer)); + seq_puts(m, buffer); + seq_puts(m, "\n"); +} + +static int xive_irq_debug_show(struct seq_file *m, void *private) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xive_debug_show_irq(m, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_irq_debug); + +static int xive_ipi_debug_show(struct seq_file *m, void *private) +{ + int cpu; + + if (xive_ops->debug_show) + xive_ops->debug_show(m, private); + + for_each_online_cpu(cpu) + xive_debug_show_ipi(m, cpu); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug); + +static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio) +{ + int i; + + seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle); + if (q->qpage) { + for (i = 0; i < q->msk + 1; i++) { + if (!(i % 8)) + seq_printf(m, "%05d ", i); + seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i), + (i + 1) % 8 ? " " : "\n"); + } + } + seq_puts(m, "\n"); +} + +static int xive_eq_debug_show(struct seq_file *m, void *private) +{ + int cpu = (long)m->private; + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + if (xc) + xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority], + xive_irq_priority); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_eq_debug); + +static void xive_core_debugfs_create(void) +{ + struct dentry *xive_dir; + struct dentry *xive_eq_dir; + long cpu; + char name[16]; + + xive_dir = debugfs_create_dir("xive", arch_debugfs_dir); + if (IS_ERR(xive_dir)) + return; + + debugfs_create_file("ipis", 0400, xive_dir, + NULL, &xive_ipi_debug_fops); + debugfs_create_file("interrupts", 0400, xive_dir, + NULL, &xive_irq_debug_fops); + xive_eq_dir = debugfs_create_dir("eqs", xive_dir); + for_each_possible_cpu(cpu) { + snprintf(name, sizeof(name), "cpu%ld", cpu); + debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu, + &xive_eq_debug_fops); + } + debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi); + + if (xive_ops->debug_create) + xive_ops->debug_create(xive_dir); +} +#else +static inline void xive_core_debugfs_create(void) { } +#endif /* CONFIG_DEBUG_FS */ + +int xive_core_debug_init(void) +{ + if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS)) + xive_core_debugfs_create(); + + return 0; +} diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0ff6b739052c..f1c0fa6ece21 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -13,13 +13,15 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/delay.h> #include <linux/cpumask.h> #include <linux/mm.h> +#include <linux/kmemleak.h> -#include <asm/prom.h> +#include <asm/machdep.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/irq.h> @@ -39,6 +41,7 @@ static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; static bool xive_has_single_esc; +bool xive_has_save_restore; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -60,14 +63,10 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) opal_flags = be64_to_cpu(flags); if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; + if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2) + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; - if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG) - data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG; - if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_MASK_FW; - if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW) - data->flags |= XIVE_IRQ_FLAG_EOI_FW; data->eoi_page = be64_to_cpu(eoi_page); data->trig_page = be64_to_cpu(trig_page); data->esb_shift = be32_to_cpu(esb_shift); @@ -126,6 +125,8 @@ static int xive_native_get_irq_config(u32 hw_irq, u32 *target, u8 *prio, return rc == 0 ? 0 : -ENXIO; } +#define vp_err(vp, fmt, ...) pr_err("VP[0x%x]: " fmt, vp, ##__VA_ARGS__) + /* This can be called multiple time to change a queue configuration */ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, __be32 *qpage, u32 order, bool can_escalate) @@ -153,7 +154,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, &esc_irq_be, NULL); if (rc) { - pr_err("Error %lld getting queue info prio %d\n", rc, prio); + vp_err(vp_id, "Failed to get queue %d info : %lld\n", prio, rc); rc = -EIO; goto fail; } @@ -176,7 +177,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, msleep(OPAL_BUSY_DELAY_MS); } if (rc) { - pr_err("Error %lld setting queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to set queue %d info: %lld\n", prio, rc); rc = -EIO; } else { /* @@ -203,7 +204,7 @@ static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) msleep(OPAL_BUSY_DELAY_MS); } if (rc) - pr_err("Error %lld disabling queue for prio %d\n", rc, prio); + vp_err(vp_id, "Failed to disable queue %d : %lld\n", prio, rc); } void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) @@ -279,12 +280,12 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) } #endif /* CONFIG_SMP */ -u32 xive_native_alloc_irq(void) +u32 xive_native_alloc_irq_on_chip(u32 chip_id) { s64 rc; for (;;) { - rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP); + rc = opal_xive_allocate_irq(chip_id); if (rc != OPAL_BUSY) break; msleep(OPAL_BUSY_DELAY_MS); @@ -293,7 +294,7 @@ u32 xive_native_alloc_irq(void) return 0; return rc; } -EXPORT_SYMBOL_GPL(xive_native_alloc_irq); +EXPORT_SYMBOL_GPL(xive_native_alloc_irq_on_chip); void xive_native_free_irq(u32 irq) { @@ -312,7 +313,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -320,7 +321,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } @@ -382,13 +383,9 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } -static void xive_native_eoi(u32 hw_irq) +static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc) { - /* - * Not normally used except if specific interrupts need - * a workaround on EOI. - */ - opal_int_eoi(hw_irq); + xc->chip_id = cpu_to_chip_id(cpu); } static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) @@ -464,6 +461,14 @@ void xive_native_sync_queue(u32 hw_irq) } EXPORT_SYMBOL_GPL(xive_native_sync_queue); +#ifdef CONFIG_DEBUG_FS +static int xive_native_debug_create(struct dentry *xive_dir) +{ + debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore); + return 0; +} +#endif + static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, @@ -473,7 +478,7 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, - .eoi = xive_native_eoi, + .prepare_cpu = xive_native_prepare_cpu, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, @@ -481,10 +486,13 @@ static const struct xive_ops xive_native_ops = { .get_ipi = xive_native_get_ipi, .put_ipi = xive_native_put_ipi, #endif /* CONFIG_SMP */ +#ifdef CONFIG_DEBUG_FS + .debug_create = xive_native_debug_create, +#endif /* CONFIG_DEBUG_FS */ .name = "native", }; -static bool xive_parse_provisioning(struct device_node *np) +static bool __init xive_parse_provisioning(struct device_node *np) { int rc; @@ -524,16 +532,16 @@ static bool xive_parse_provisioning(struct device_node *np) return true; } -static void xive_native_setup_pools(void) +static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); + pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) - pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); + pr_err("Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", + pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } @@ -571,12 +579,12 @@ bool __init xive_native_init(void) /* Resource 1 is HV window */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } /* Read number of priorities */ @@ -591,8 +599,9 @@ bool __init xive_native_init(void) } /* Do we support single escalation */ - if (of_get_property(np, "single-escalation-support", NULL) != NULL) - xive_has_single_esc = true; + xive_has_single_esc = of_property_read_bool(np, "single-escalation-support"); + + xive_has_save_restore = of_property_read_bool(np, "vp-save-restore"); /* Configure Thread Management areas for KVM */ for_each_possible_cpu(cpu) @@ -601,32 +610,37 @@ bool __init xive_native_init(void) /* Resource 2 is OS window */ if (of_address_to_resource(np, 2, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } xive_tima_os = r.start; - /* Grab size of provisionning pages */ + /* Grab size of provisioning pages */ xive_parse_provisioning(np); /* Switch the XIVE to exploitation mode */ rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL); if (rc) { pr_err("Switch to exploitation mode failed with error %lld\n", rc); - return false; + goto err_put; } /* Setup some dummy HV pool VPs */ xive_native_setup_pools(); /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, + if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS, max_prio)) { opal_xive_reset(OPAL_XIVE_MODE_EMU); - return false; + goto err_put; } + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_put: + of_node_put(np); + return false; } static bool xive_native_provision_pages(void) @@ -646,6 +660,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; @@ -711,6 +726,8 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to enable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_enable_vp); @@ -725,6 +742,8 @@ int xive_native_disable_vp(u32 vp_id) break; msleep(OPAL_BUSY_DELAY_MS); } + if (rc) + vp_err(vp_id, "Failed to disable VP : %lld\n", rc); return rc ? -EIO : 0; } EXPORT_SYMBOL_GPL(xive_native_disable_vp); @@ -736,8 +755,10 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id) s64 rc; rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be); - if (rc) + if (rc) { + vp_err(vp_id, "Failed to get VP info : %lld\n", rc); return -EIO; + } *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu; *out_chip_id = be32_to_cpu(vp_chip_id_be); @@ -751,6 +772,12 @@ bool xive_native_has_single_escalation(void) } EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); +bool xive_native_has_save_restore(void) +{ + return xive_has_save_restore; +} +EXPORT_SYMBOL_GPL(xive_native_has_save_restore); + int xive_native_get_queue_info(u32 vp_id, u32 prio, u64 *out_qpage, u64 *out_qsize, @@ -768,15 +795,14 @@ int xive_native_get_queue_info(u32 vp_id, u32 prio, rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, &qeoi_page, &escalate_irq, &qflags); if (rc) { - pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d info : %lld\n", prio, rc); return -EIO; } if (out_qpage) *out_qpage = be64_to_cpu(qpage); if (out_qsize) - *out_qsize = be32_to_cpu(qsize); + *out_qsize = be64_to_cpu(qsize); if (out_qeoi_page) *out_qeoi_page = be64_to_cpu(qeoi_page); if (out_escalate_irq) @@ -797,8 +823,7 @@ int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, &opal_qindex); if (rc) { - pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to get queue %d state : %lld\n", prio, rc); return -EIO; } @@ -817,8 +842,7 @@ int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); if (rc) { - pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", - vp_id, prio, rc); + vp_err(vp_id, "failed to set queue %d state : %lld\n", prio, rc); return -EIO; } @@ -840,8 +864,7 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) rc = opal_xive_get_vp_state(vp_id, &state); if (rc) { - pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", - vp_id, rc); + vp_err(vp_id, "failed to get vp state : %lld\n", rc); return -EIO; } @@ -850,3 +873,5 @@ int xive_native_get_vp_state(u32 vp_id, u64 *out_state) return 0; } EXPORT_SYMBOL_GPL(xive_native_get_vp_state); + +machine_arch_initcall(powernv, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 55dc61cb4867..e45419264391 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -11,13 +11,17 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/bitmap.h> #include <linux/cpumask.h> #include <linux/mm.h> #include <linux/delay.h> #include <linux/libfdt.h> +#include <asm/machdep.h> #include <asm/prom.h> #include <asm/io.h> #include <asm/smp.h> @@ -26,6 +30,8 @@ #include <asm/xive.h> #include <asm/xive-regs.h> #include <asm/hvcall.h> +#include <asm/svm.h> +#include <asm/ultravisor.h> #include "xive-internal.h" @@ -41,7 +47,7 @@ struct xive_irq_bitmap { static LIST_HEAD(xive_irq_bitmaps); -static int xive_irq_bitmap_add(int base, int count) +static int __init xive_irq_bitmap_add(int base, int count) { struct xive_irq_bitmap *xibm; @@ -52,7 +58,7 @@ static int xive_irq_bitmap_add(int base, int count) spin_lock_init(&xibm->lock); xibm->base = base; xibm->count = count; - xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL); + xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL); if (!xibm->bitmap) { kfree(xibm); return -ENOMEM; @@ -64,6 +70,17 @@ static int xive_irq_bitmap_add(int base, int count) return 0; } +static void xive_irq_bitmap_remove_all(void) +{ + struct xive_irq_bitmap *xibm, *tmp; + + list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) { + list_del(&xibm->list); + bitmap_free(xibm->bitmap); + kfree(xibm); + } +} + static int __xive_irq_bitmap_alloc(struct xive_irq_bitmap *xibm) { int irq; @@ -170,7 +187,7 @@ static long plpar_int_get_source_info(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); + pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -179,8 +196,8 @@ static long plpar_int_get_source_info(unsigned long flags, *trig_page = retbuf[2]; *esb_shift = retbuf[3]; - pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", - retbuf[0], retbuf[1], retbuf[2], retbuf[3]); + pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n", + lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]); return 0; } @@ -197,8 +214,8 @@ static long plpar_int_set_source_config(unsigned long flags, long rc; - pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", - flags, lisn, target, prio, sw_irq); + pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n", + flags, lisn, target, prio, sw_irq); do { @@ -207,7 +224,7 @@ static long plpar_int_set_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", + pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n", lisn, target, prio, rc); return rc; } @@ -224,7 +241,7 @@ static long plpar_int_get_source_config(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn); + pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn); do { rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, @@ -232,7 +249,7 @@ static long plpar_int_get_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n", + pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -241,8 +258,8 @@ static long plpar_int_get_source_config(unsigned long flags, *prio = retbuf[1]; *sw_irq = retbuf[2]; - pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n", - retbuf[0], retbuf[1], retbuf[2]); + pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n", + retbuf[0], retbuf[1], retbuf[2]); return 0; } @@ -270,8 +287,8 @@ static long plpar_int_get_queue_info(unsigned long flags, *esn_page = retbuf[0]; *esn_size = retbuf[1]; - pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", - retbuf[0], retbuf[1]); + pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n", + target, priority, retbuf[0], retbuf[1]); return 0; } @@ -286,8 +303,8 @@ static long plpar_int_set_queue_config(unsigned long flags, { long rc; - pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", - flags, target, priority, qpage, qsize); + pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n", + flags, target, priority, qpage, qsize); do { rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, @@ -295,7 +312,7 @@ static long plpar_int_set_queue_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n", target, priority, qpage, rc); return rc; } @@ -312,7 +329,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn) } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); + pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc); return rc; } @@ -330,8 +347,8 @@ static long plpar_int_esb(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", - flags, lisn, offset, in_data); + pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n", + flags, lisn, offset, in_data); do { rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, @@ -339,7 +356,7 @@ static long plpar_int_esb(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", + pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n", lisn, offset, rc); return rc; } @@ -422,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift); if (!data->trig_mmio) { + iounmap(data->eoi_mmio); pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq); return -ENOMEM; } @@ -501,6 +519,9 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio, rc = -EIO; } else { q->qpage = qpage; + if (is_secure_guest()) + uv_share_page(PHYS_PFN(qpage_phys), + 1 << xive_alloc_order(order)); } fail: return rc; @@ -534,6 +555,8 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, hw_cpu, prio); alloc_order = xive_alloc_order(xive_queue_shift); + if (is_secure_guest()) + uv_unshare_page(PHYS_PFN(__pa(q->qpage)), 1 << alloc_order); free_pages((unsigned long)q->qpage, alloc_order); q->qpage = NULL; } @@ -541,7 +564,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, static bool xive_spapr_match(struct device_node *node) { /* Ignore cascaded controllers for the moment */ - return 1; + return true; } #ifdef CONFIG_SMP @@ -560,11 +583,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ @@ -620,11 +643,6 @@ static void xive_spapr_update_pending(struct xive_cpu *xc) } } -static void xive_spapr_eoi(u32 hw_irq) -{ - /* Not used */; -} - static void xive_spapr_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { /* Only some debug on the TIMA settings */ @@ -645,6 +663,24 @@ static void xive_spapr_sync_source(u32 hw_irq) plpar_int_sync(0, hw_irq); } +static int xive_spapr_debug_show(struct seq_file *m, void *private) +{ + struct xive_irq_bitmap *xibm; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + + if (!buf) + return -ENOMEM; + + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { + memset(buf, 0, PAGE_SIZE); + bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); + seq_printf(m, "bitmap #%d: %s", xibm->count, buf); + } + kfree(buf); + + return 0; +} + static const struct xive_ops xive_spapr_ops = { .populate_irq_data = xive_spapr_populate_irq_data, .configure_irq = xive_spapr_configure_irq, @@ -654,7 +690,6 @@ static const struct xive_ops xive_spapr_ops = { .match = xive_spapr_match, .shutdown = xive_spapr_shutdown, .update_pending = xive_spapr_update_pending, - .eoi = xive_spapr_eoi, .setup_cpu = xive_spapr_setup_cpu, .teardown_cpu = xive_spapr_teardown_cpu, .sync_source = xive_spapr_sync_source, @@ -662,6 +697,7 @@ static const struct xive_ops xive_spapr_ops = { #ifdef CONFIG_SMP .get_ipi = xive_spapr_get_ipi, .put_ipi = xive_spapr_put_ipi, + .debug_show = xive_spapr_debug_show, #endif /* CONFIG_SMP */ .name = "spapr", }; @@ -669,7 +705,7 @@ static const struct xive_ops xive_spapr_ops = { /* * get max priority from "/ibm,plat-res-int-priorities" */ -static bool xive_get_max_prio(u8 *max_prio) +static bool __init xive_get_max_prio(u8 *max_prio) { struct device_node *rootdn; const __be32 *reg; @@ -683,6 +719,7 @@ static bool xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; @@ -723,7 +760,7 @@ static bool xive_get_max_prio(u8 *max_prio) return true; } -static const u8 *get_vec5_feature(unsigned int index) +static const u8 *__init get_vec5_feature(unsigned int index) { unsigned long root, chosen; int size; @@ -744,7 +781,7 @@ static const u8 *get_vec5_feature(unsigned int index) return vec5 + index; } -static bool xive_spapr_disabled(void) +static bool __init xive_spapr_disabled(void) { const u8 *vec5_xive; @@ -782,7 +819,7 @@ bool __init xive_spapr_init(void) u32 val; u32 len; const __be32 *reg; - int i; + int i, err; if (xive_spapr_disabled()) return false; @@ -798,32 +835,35 @@ bool __init xive_spapr_init(void) /* Resource 1 is the OS ring TIMA */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } if (!xive_get_max_prio(&max_prio)) - return false; + goto err_unmap; /* Feed the IRQ number allocator with the ranges given in the DT */ reg = of_get_property(np, "ibm,xive-lisn-ranges", &len); if (!reg) { pr_err("Failed to read 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } if (len % (2 * sizeof(u32)) != 0) { pr_err("invalid 'ibm,xive-lisn-ranges' property\n"); - return false; + goto err_unmap; } - for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) - xive_irq_bitmap_add(be32_to_cpu(reg[0]), - be32_to_cpu(reg[1])); + for (i = 0; i < len / (2 * sizeof(u32)); i++, reg += 2) { + err = xive_irq_bitmap_add(be32_to_cpu(reg[0]), + be32_to_cpu(reg[1])); + if (err < 0) + goto err_mem_free; + } /* Iterate the EQ sizes and pick one */ of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, reg, val) { @@ -833,9 +873,20 @@ bool __init xive_spapr_init(void) } /* Initialize XIVE core with our backend */ - if (!xive_core_init(&xive_spapr_ops, tima, TM_QW1_OS, max_prio)) - return false; + if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) + goto err_mem_free; + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; + +err_mem_free: + xive_irq_bitmap_remove_all(); +err_unmap: + iounmap(tima); +err_put: + of_node_put(np); + return false; } + +machine_arch_initcall(pseries, xive_core_debug_init); diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 59cd366e7933..fe6d95d54af9 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP @@ -37,25 +44,28 @@ struct xive_ops { u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); + void (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc); bool (*match)(struct device_node *np); void (*shutdown)(void); void (*update_pending)(struct xive_cpu *xc); - void (*eoi)(u32 hw_irq); void (*sync_source)(u32 hw_irq); u64 (*esb_rw)(u32 hw_irq, u32 offset, u64 data, bool write); #ifdef CONFIG_SMP int (*get_ipi)(unsigned int cpu, struct xive_cpu *xc); void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif + int (*debug_show)(struct seq_file *m, void *private); + int (*debug_create)(struct dentry *xive_dir); const char *name; }; -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio); +bool xive_core_init(struct device_node *np, const struct xive_ops *ops, + void __iomem *area, u32 offset, u8 max_prio); __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift); +int xive_core_debug_init(void); static inline u32 xive_alloc_order(u32 queue_shift) { @@ -63,5 +73,6 @@ static inline u32 xive_alloc_order(u32 queue_shift) } extern bool xive_cmdline_disabled; +extern bool xive_has_save_restore; #endif /* __XIVE_INTERNAL_H */ |