diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/msi.c')
| -rw-r--r-- | arch/powerpc/platforms/pseries/msi.c | 325 |
1 files changed, 225 insertions, 100 deletions
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 637300330507..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -7,7 +7,10 @@ #include <linux/crash_dump.h> #include <linux/device.h> #include <linux/irq.h> +#include <linux/irqchip/irq-msi-lib.h> +#include <linux/irqdomain.h> #include <linux/msi.h> +#include <linux/seq_file.h> #include <asm/rtas.h> #include <asm/hw_irq.h> @@ -24,6 +27,7 @@ static int query_token, change_token; #define RTAS_CHANGE_MSI_FN 3 #define RTAS_CHANGE_MSIX_FN 4 #define RTAS_CHANGE_32MSI_FN 5 +#define RTAS_CHANGE_32MSIX_FN 6 /* RTAS Helpers */ @@ -39,7 +43,7 @@ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) seq_num = 1; do { if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN || - func == RTAS_CHANGE_32MSI_FN) + func == RTAS_CHANGE_32MSI_FN || func == RTAS_CHANGE_32MSIX_FN) rc = rtas_call(change_token, 6, 4, rtas_ret, addr, BUID_HI(buid), BUID_LO(buid), func, num_irqs, seq_num); @@ -110,21 +114,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset) return rtas_ret[0]; } -static void rtas_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct msi_desc *entry; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->irq) - continue; - - irq_set_msi_desc(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - } - - rtas_disable_msi(pdev); -} - static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) { struct device_node *dn; @@ -164,12 +153,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec) /* Quota calculation */ -static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +static struct device_node *__find_pe_total_msi(struct device_node *node, int *total) { struct device_node *dn; const __be32 *p; - dn = of_node_get(pci_device_to_OF_node(dev)); + dn = of_node_get(node); while (dn) { p = of_get_property(dn, "ibm,pe-total-#msi", NULL); if (p) { @@ -185,6 +174,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) return NULL; } +static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +{ + return __find_pe_total_msi(pci_device_to_OF_node(dev), total); +} + static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; @@ -330,27 +324,6 @@ out: return request; } -static int check_msix_entries(struct pci_dev *pdev) -{ - struct msi_desc *entry; - int expected; - - /* There's no way for us to express to firmware that we want - * a discontiguous, or non-zero based, range of MSI-X entries. - * So we must reject such requests. */ - - expected = 0; - for_each_pci_msi_entry(entry, pdev) { - if (entry->msi_attrib.entry_nr != expected) { - pr_debug("rtas_msi: bad MSI-X entries.\n"); - return -EINVAL; - } - expected++; - } - - return 0; -} - static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) { u32 addr_hi, addr_lo; @@ -368,12 +341,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); } -static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) +static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, + msi_alloc_info_t *arg) { struct pci_dn *pdn; - int hwirq, virq, i, quota, rc; - struct msi_desc *entry; - struct msi_msg msg; + int quota, rc; int nvec = nvec_in; int use_32bit_msi_hack = 0; @@ -390,9 +362,6 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) if (quota && quota < nvec) return quota; - if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev)) - return -EINVAL; - /* * Firmware currently refuse any non power of two allocation * so we round up if the quota will allow it. @@ -439,8 +408,12 @@ again: if (use_32bit_msi_hack && rc > 0) rtas_hack_32bit_msi_gen2(pdev); - } else - rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); + } else { + if (pdev->no_64bit_msi) + rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec); + else + rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); + } if (rc != nvec) { if (nvec != nvec_in) { @@ -451,51 +424,215 @@ again: return rc; } - i = 0; - for_each_pci_msi_entry(entry, pdev) { - hwirq = rtas_query_irq_number(pdn, i++); - if (hwirq < 0) { - pr_debug("rtas_msi: error (%d) getting hwirq\n", rc); - return hwirq; - } + return 0; +} - /* - * Depending on the number of online CPUs in the original - * kernel, it is likely for CPU #0 to be offline in a kdump - * kernel. The associated IRQs in the affinity mappings - * provided by irq_create_affinity_masks() are thus not - * started by irq_startup(), as per-design for managed IRQs. - * This can be a problem with multi-queue block devices driven - * by blk-mq : such a non-started IRQ is very likely paired - * with the single queue enforced by blk-mq during kdump (see - * blk_mq_alloc_tag_set()). This causes the device to remain - * silent and likely hangs the guest at some point. - * - * We don't really care for fine-grained affinity when doing - * kdump actually : simply ignore the pre-computed affinity - * masks in this case and let the default mask with all CPUs - * be used when creating the IRQ mappings. - */ - if (is_kdump_kernel()) - virq = irq_create_mapping(NULL, hwirq); - else - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); +static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct pci_dev *pdev = to_pci_dev(dev); + int type = (info->flags & MSI_FLAG_PCI_MSIX) ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI; - if (!virq) { - pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); - return -ENOSPC; - } + return rtas_prepare_msi_irqs(pdev, nvec, type, arg); +} + +/* + * RTAS can not disable one MSI at a time. It's all or nothing. Do it + * at the end after all IRQs have been freed. + */ +static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(domain->dev); + + rtas_disable_msi(pdev); +} + +static void pseries_msi_shutdown(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_shutdown) + d->chip->irq_shutdown(d); +} + +static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_data_get_msi_desc(data); + + /* + * Do not update the MSIx vector table. It's not strictly necessary + * because the table is initialized by the underlying hypervisor, PowerVM + * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further + * activation will fail. This can happen in some drivers (eg. IPR) which + * deactivate an IRQ used for testing MSI support. + */ + entry->msg = *msg; +} + +static bool pseries_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) + return false; - dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq); - irq_set_msi_desc(virq, entry); + chip->irq_shutdown = pseries_msi_shutdown; + chip->irq_write_msi_msg = pseries_msi_write_msg; - /* Read config space back so we can restore after reset */ - __pci_read_msi_msg(entry, &msg); - entry->msg = msg; + info->ops->msi_prepare = pseries_msi_ops_prepare; + info->ops->msi_teardown = pseries_msi_ops_teardown; + + return true; +} + +#define PSERIES_PCI_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ + MSI_FLAG_USE_DEF_CHIP_OPS | \ + MSI_FLAG_PCI_MSI_MASK_PARENT) +#define PSERIES_PCI_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | \ + MSI_FLAG_PCI_MSIX | \ + MSI_FLAG_MSIX_CONTIGUOUS | \ + MSI_FLAG_MULTI_PCI_MSI) + +static const struct msi_parent_ops pseries_msi_parent_ops = { + .required_flags = PSERIES_PCI_MSI_FLAGS_REQUIRED, + .supported_flags = PSERIES_PCI_MSI_FLAGS_SUPPORTED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, + .bus_select_token = DOMAIN_BUS_NEXUS, + .bus_select_mask = MATCH_PCI_MSI, + .prefix = "pSeries-", + .init_dev_msi_info = pseries_init_dev_msi_info, +}; + +static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + + if (dev->current_state == PCI_D0) + __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); + else + get_cached_msi_msg(data->irq, msg); +} + +static struct irq_chip pseries_msi_irq_chip = { + .name = "pSeries-MSI", + .irq_shutdown = pseries_msi_shutdown, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = pseries_msi_compose_msg, +}; + +static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_fwspec parent_fwspec; + int ret; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 2; + parent_fwspec.param[0] = hwirq; + parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + + ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); + if (ret) + return ret; + + return 0; +} + +static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct pci_controller *phb = domain->host_data; + msi_alloc_info_t *info = arg; + struct msi_desc *desc = info->desc; + struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + int hwirq; + int i, ret; + + hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index); + if (hwirq < 0) { + dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq); + return hwirq; + } + + dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, + phb->dn, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i); + if (ret) + goto out; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &pseries_msi_irq_chip, domain->host_data); } return 0; + +out: + /* TODO: handle RTAS cleanup in ->msi_finish() ? */ + irq_domain_free_irqs_parent(domain, virq, i); + return ret; +} + +static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct pci_controller *phb = irq_data_get_irq_chip_data(d); + + pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops pseries_irq_domain_ops = { + .select = msi_lib_irq_domain_select, + .alloc = pseries_irq_domain_alloc, + .free = pseries_irq_domain_free, +}; + +static int __pseries_msi_allocate_domains(struct pci_controller *phb, + unsigned int count) +{ + struct irq_domain *parent = irq_get_default_domain(); + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(phb->dn), + .ops = &pseries_irq_domain_ops, + .host_data = phb, + .size = count, + .parent = parent, + }; + + phb->dev_domain = msi_create_parent_irq_domain(&info, &pseries_msi_parent_ops); + if (!phb->dev_domain) { + pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + return -ENOMEM; + } + + return 0; +} + +int pseries_msi_allocate_domains(struct pci_controller *phb) +{ + int count; + + if (!__find_pe_total_msi(phb->dn, &count)) { + pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + return -ENOSPC; + } + + return __pseries_msi_allocate_domains(phb, count); +} + +void pseries_msi_free_domains(struct pci_controller *phb) +{ + if (phb->dev_domain) + irq_domain_remove(phb->dev_domain); } static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) @@ -518,10 +655,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { - struct pci_controller *phb; - - query_token = rtas_token("ibm,query-interrupt-source-number"); - change_token = rtas_token("ibm,change-msi"); + query_token = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER); + change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI); if ((query_token == RTAS_UNKNOWN_SERVICE) || (change_token == RTAS_UNKNOWN_SERVICE)) { @@ -531,16 +666,6 @@ static int rtas_msi_init(void) pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); - WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); - pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; - pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; - - list_for_each_entry(phb, &hose_list, list_node) { - WARN_ON(phb->controller_ops.setup_msi_irqs); - phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; - phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; - } - WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; |
