summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2288
1 files changed, 583 insertions, 1705 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index da1068a9c263..23f5b5093ec1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -20,10 +20,12 @@
#include <linux/iommu.h>
#include <linux/rculist.h>
#include <linux/sizes.h>
+#include <linux/debugfs.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/sections.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/msi_bitmap.h>
@@ -32,10 +34,10 @@
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/xics.h>
-#include <asm/debugfs.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
#include <asm/mmzone.h>
+#include <asm/xive.h>
#include <misc/cxl-base.h>
@@ -43,14 +45,11 @@
#include "pci.h"
#include "../../../../drivers/pci/pci.h"
-#define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */
-#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
-#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
-
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
- "NPU_OCAPI" };
+/* This array is indexed with enum pnv_phb_type */
+static const char * const pnv_phb_names[] = { "IODA2", "NPU_OCAPI" };
static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+static void pnv_pci_configure_bus(struct pci_bus *bus);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
@@ -65,7 +64,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
vaf.va = &args;
if (pe->flags & PNV_IODA_PE_DEV)
- strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+ strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
sprintf(pfix, "%04x:%02x ",
pci_domain_nr(pe->pbus), pe->pbus->number);
@@ -114,32 +113,13 @@ static int __init pci_reset_phbs_setup(char *str)
early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
-static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
-{
- /*
- * WARNING: We cannot rely on the resource flags. The Linux PCI
- * allocation code sometimes decides to put a 64-bit prefetchable
- * BAR in the 32-bit window, so we have to compare the addresses.
- *
- * For simplicity we only test resource start.
- */
- return (r->start >= phb->ioda.m64_base &&
- r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
-}
-
-static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
-{
- unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
-
- return (resource_flags & flags) == flags;
-}
-
static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
{
s64 rc;
phb->ioda.pe_array[pe_no].phb = phb;
phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ phb->ioda.pe_array[pe_no].dma_setup_done = false;
/*
* Clear the PE frozen state as it might be put into frozen state
@@ -163,35 +143,58 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
return;
}
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
pr_debug("%s: PE %x was reserved on PHB#%x\n",
__func__, pe_no, phb->hose->global_number);
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
pnv_ioda_init_pe(phb, pe_no);
}
-static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
{
- long pe;
+ struct pnv_ioda_pe *ret = NULL;
+ int run = 0, pe, i;
+
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ /* scan backwards for a run of @count cleared bits */
for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
- if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
- return pnv_ioda_init_pe(phb, pe);
+ if (test_bit(pe, phb->ioda.pe_alloc)) {
+ run = 0;
+ continue;
+ }
+
+ run++;
+ if (run == count)
+ break;
+ }
+ if (run != count)
+ goto out;
+
+ for (i = pe; i < pe + count; i++) {
+ set_bit(i, phb->ioda.pe_alloc);
+ pnv_ioda_init_pe(phb, i);
}
+ ret = &phb->ioda.pe_array[pe];
- return NULL;
+out:
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ return ret;
}
-static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
{
struct pnv_phb *phb = pe->phb;
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
- WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
- kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
+
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
clear_bit(pe_num, phb->ioda.pe_alloc);
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
}
/* The default M64 BAR is shared by all PEs */
@@ -251,8 +254,7 @@ fail:
static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
unsigned long *pe_bitmap)
{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
struct resource *r;
resource_size_t base, sgsz, start, end;
int segno, i;
@@ -264,8 +266,8 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
if (!r->parent || !pnv_pci_is_m64(phb, r))
continue;
- start = _ALIGN_DOWN(r->start - base, sgsz);
- end = _ALIGN_UP(r->end - base, sgsz);
+ start = ALIGN_DOWN(r->start - base, sgsz);
+ end = ALIGN(r->end - base, sgsz);
for (segno = start / sgsz; segno < end / sgsz; segno++) {
if (pe_bitmap)
set_bit(segno, pe_bitmap);
@@ -275,64 +277,6 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
}
}
-static int pnv_ioda1_init_m64(struct pnv_phb *phb)
-{
- struct resource *r;
- int index;
-
- /*
- * There are 16 M64 BARs, each of which has 8 segments. So
- * there are as many M64 segments as the maximum number of
- * PEs, which is 128.
- */
- for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
- unsigned long base, segsz = phb->ioda.m64_segsize;
- int64_t rc;
-
- base = phb->ioda.m64_base +
- index * PNV_IODA1_M64_SEGS * segsz;
- rc = opal_pci_set_phb_mem_window(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, index, base, 0,
- PNV_IODA1_M64_SEGS * segsz);
- if (rc != OPAL_SUCCESS) {
- pr_warn(" Error %lld setting M64 PHB#%x-BAR#%d\n",
- rc, phb->hose->global_number, index);
- goto fail;
- }
-
- rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, index,
- OPAL_ENABLE_M64_SPLIT);
- if (rc != OPAL_SUCCESS) {
- pr_warn(" Error %lld enabling M64 PHB#%x-BAR#%d\n",
- rc, phb->hose->global_number, index);
- goto fail;
- }
- }
-
- /*
- * Exclude the segments for reserved and root bus PE, which
- * are first or last two PEs.
- */
- r = &phb->hose->mem_resources[1];
- if (phb->ioda.reserved_pe_idx == 0)
- r->start += (2 * phb->ioda.m64_segsize);
- else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
- r->end -= (2 * phb->ioda.m64_segsize);
- else
- WARN(1, "Wrong reserved PE#%x on PHB#%x\n",
- phb->ioda.reserved_pe_idx, phb->hose->global_number);
-
- return 0;
-
-fail:
- for ( ; index >= 0; index--)
- opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
-
- return -EIO;
-}
-
static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
unsigned long *pe_bitmap,
bool all)
@@ -350,8 +294,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
struct pnv_ioda_pe *master_pe, *pe;
unsigned long size, *pe_alloc;
int i;
@@ -361,7 +304,7 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
return NULL;
/* Allocate bitmap */
- size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
+ size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
if (!pe_alloc) {
pr_warn("%s: Out of memory !\n",
@@ -402,26 +345,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
pe->master = master_pe;
list_add_tail(&pe->list, &master_pe->slaves);
}
-
- /*
- * P7IOC supports M64DT, which helps mapping M64 segment
- * to one particular PE#. However, PHB3 has fixed mapping
- * between M64 segment and PE#. In order to have same logic
- * for P7IOC and PHB3, we enforce fixed mapping between M64
- * segment and PE# on P7IOC.
- */
- if (phb->type == PNV_PHB_IODA1) {
- int64_t rc;
-
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe->pe_number, OPAL_M64_WINDOW_TYPE,
- pe->pe_number / PNV_IODA1_M64_SEGS,
- pe->pe_number % PNV_IODA1_M64_SEGS);
- if (rc != OPAL_SUCCESS)
- pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
- __func__, rc, phb->hose->global_number,
- pe->pe_number);
- }
}
kfree(pe_alloc);
@@ -437,7 +360,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
const __be32 *r;
u64 pci_addr;
- if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
+ if (phb->type != PNV_PHB_IODA2) {
pr_info(" Not support M64 window\n");
return;
}
@@ -512,10 +435,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
* Setup init functions for M64 based on IODA version, IODA3 uses
* the IODA2 code.
*/
- if (phb->type == PNV_PHB_IODA1)
- phb->init_m64 = pnv_ioda1_init_m64;
- else
- phb->init_m64 = pnv_ioda2_init_m64;
+ phb->init_m64 = pnv_ioda2_init_m64;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -660,10 +580,19 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
return state;
}
+struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
+{
+ int pe_number = phb->ioda.pe_rmap[bdfn];
+
+ if (pe_number == IODA_INVALID_PE)
+ return NULL;
+
+ return &phb->ioda.pe_array[pe_number];
+}
+
struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
if (!pdn)
@@ -777,7 +706,35 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
return 0;
}
-static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe,
+ struct pci_dev *parent)
+{
+ int64_t rc;
+
+ while (parent) {
+ struct pci_dn *pdn = pci_get_pdn(parent);
+
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+ pe->pe_number,
+ OPAL_REMOVE_PE_FROM_DOMAIN);
+ /* XXX What to do in case of error ? */
+ }
+ parent = parent->bus->self;
+ }
+
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+ /* Disassociate PE in PELT */
+ rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+ pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+ if (rc)
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
+}
+
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
struct pci_dev *parent;
uint8_t bcomp, dcomp, fcomp;
@@ -792,7 +749,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
- count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ count = resource_size(&pe->pbus->busn_res);
else
count = 1;
@@ -827,25 +784,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
for (rid = pe->rid; rid < rid_end; rid++)
phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
- /* Release from all parents PELT-V */
- while (parent) {
- struct pci_dn *pdn = pci_get_pdn(parent);
- if (pdn && pdn->pe_number != IODA_INVALID_PE) {
- rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
- pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
- /* XXX What to do in case of error ? */
- }
- parent = parent->bus->self;
- }
-
- opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
- OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ /*
+ * Release from all parents PELT-V. NPUs don't have a PELTV
+ * table
+ */
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ pnv_ioda_unset_peltv(phb, pe, parent);
- /* Disassociate PE in PELT */
- rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
- pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
- if (rc)
- pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
@@ -860,9 +805,8 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
return 0;
}
-static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
- struct pci_dev *parent;
uint8_t bcomp, dcomp, fcomp;
long rc, rid_end, rid;
@@ -872,9 +816,8 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
- parent = pe->pbus->self;
if (pe->flags & PNV_IODA_PE_BUS_ALL)
- count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ count = resource_size(&pe->pbus->busn_res);
else
count = 1;
@@ -893,12 +836,6 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
}
rid_end = pe->rid + (count << 8);
} else {
-#ifdef CONFIG_PCI_IOV
- if (pe->flags & PNV_IODA_PE_VF)
- parent = pe->parent_dev;
- else
-#endif /* CONFIG_PCI_IOV */
- parent = pe->pdev->bus->self;
bcomp = OpalPciBusAll;
dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
@@ -922,128 +859,21 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
for (rid = pe->rid; rid < rid_end; rid++)
phb->ioda.pe_rmap[rid] = pe->pe_number;
- /* Setup one MVTs on IODA1 */
- if (phb->type != PNV_PHB_IODA1) {
- pe->mve_number = 0;
- goto out;
- }
+ pe->mve_number = 0;
- pe->mve_number = pe->pe_number;
- rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number);
- if (rc != OPAL_SUCCESS) {
- pe_err(pe, "OPAL error %ld setting up MVE %x\n",
- rc, pe->mve_number);
- pe->mve_number = -1;
- } else {
- rc = opal_pci_set_mve_enable(phb->opal_id,
- pe->mve_number, OPAL_ENABLE_MVE);
- if (rc) {
- pe_err(pe, "OPAL error %ld enabling MVE %x\n",
- rc, pe->mve_number);
- pe->mve_number = -1;
- }
- }
-
-out:
return 0;
}
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
-{
- struct pci_dn *pdn = pci_get_pdn(dev);
- int i;
- struct resource *res, res2;
- resource_size_t size;
- u16 num_vfs;
-
- if (!dev->is_physfn)
- return -EINVAL;
-
- /*
- * "offset" is in VFs. The M64 windows are sized so that when they
- * are segmented, each segment is the same size as the IOV BAR.
- * Each segment is in a separate PE, and the high order bits of the
- * address are the PE number. Therefore, each VF's BAR is in a
- * separate PE, and changing the IOV BAR start address changes the
- * range of PEs the VFs are in.
- */
- num_vfs = pdn->num_vfs;
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &dev->resource[i + PCI_IOV_RESOURCES];
- if (!res->flags || !res->parent)
- continue;
-
- /*
- * The actual IOV BAR range is determined by the start address
- * and the actual size for num_vfs VFs BAR. This check is to
- * make sure that after shifting, the range will not overlap
- * with another device.
- */
- size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
- res2.flags = res->flags;
- res2.start = res->start + (size * offset);
- res2.end = res2.start + (size * num_vfs) - 1;
-
- if (res2.end > res->end) {
- dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
- i, &res2, res, num_vfs, offset);
- return -EBUSY;
- }
- }
-
- /*
- * Since M64 BAR shares segments among all possible 256 PEs,
- * we have to shift the beginning of PF IOV BAR to make it start from
- * the segment which belongs to the PE number assigned to the first VF.
- * This creates a "hole" in the /proc/iomem which could be used for
- * allocating other resources so we reserve this area below and
- * release when IOV is released.
- */
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &dev->resource[i + PCI_IOV_RESOURCES];
- if (!res->flags || !res->parent)
- continue;
-
- size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
- res2 = *res;
- res->start += size * offset;
-
- dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
- i, &res2, res, (offset > 0) ? "En" : "Dis",
- num_vfs, offset);
-
- if (offset < 0) {
- devm_release_resource(&dev->dev, &pdn->holes[i]);
- memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
- }
-
- pci_update_resource(dev, i + PCI_IOV_RESOURCES);
-
- if (offset > 0) {
- pdn->holes[i].start = res2.start;
- pdn->holes[i].end = res2.start + size * offset - 1;
- pdn->holes[i].flags = IORESOURCE_BUS;
- pdn->holes[i].name = "pnv_iov_reserved";
- devm_request_resource(&dev->dev, res->parent,
- &pdn->holes[i]);
- }
- }
- return 0;
-}
-#endif /* CONFIG_PCI_IOV */
-
static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
struct pci_dn *pdn = pci_get_pdn(dev);
struct pnv_ioda_pe *pe;
@@ -1055,27 +885,26 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
if (pdn->pe_number != IODA_INVALID_PE)
return NULL;
- pe = pnv_ioda_alloc_pe(phb);
+ pe = pnv_ioda_alloc_pe(phb, 1);
if (!pe) {
pr_warn("%s: Not enough PE# available, disabling device\n",
pci_name(dev));
return NULL;
}
- /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
- * pointer in the PE data structure, both should be destroyed at the
- * same time. However, this needs to be looked at more closely again
- * once we actually start removing things (Hotplug, SR-IOV, ...)
+ /* NOTE: We don't get a reference for the pointer in the PE
+ * data structure, both the device and PE structures should be
+ * destroyed at the same time.
*
* At some point we want to remove the PDN completely anyways
*/
- pci_dev_get(dev);
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
pe->pbus = NULL;
pe->mve_number = -1;
pe->rid = dev->bus->number << 8 | pdn->devfn;
+ pe->device_count++;
pe_info(pe, "Associated device to PE\n");
@@ -1084,44 +913,16 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
- pci_dev_put(dev);
return NULL;
}
/* Put PE to the list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
-
+ mutex_unlock(&phb->ioda.pe_list_mutex);
return pe;
}
-static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- struct pci_dn *pdn = pci_get_pdn(dev);
-
- if (pdn == NULL) {
- pr_warn("%s: No device node associated with device !\n",
- pci_name(dev));
- continue;
- }
-
- /*
- * In partial hotplug case, the PCI device might be still
- * associated with the PE and needn't attach it to the PE
- * again.
- */
- if (pdn->pe_number != IODA_INVALID_PE)
- continue;
-
- pe->device_count++;
- pdn->pe_number = pe->pe_number;
- if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_same_PE(dev->subordinate, pe);
- }
-}
-
/*
* There're 2 types of PCI bus sensitive PEs: One that is compromised of
* single PCI bus. Another one that contains the primary PCI bus and its
@@ -1130,8 +931,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
*/
static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
struct pnv_ioda_pe *pe = NULL;
unsigned int pe_num;
@@ -1140,15 +940,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
* We should reuse it instead of allocating a new one.
*/
pe_num = phb->ioda.pe_rmap[bus->number << 8];
- if (pe_num != IODA_INVALID_PE) {
+ if (WARN_ON(pe_num != IODA_INVALID_PE)) {
pe = &phb->ioda.pe_array[pe_num];
- pnv_ioda_setup_same_PE(bus, pe);
return NULL;
}
/* PE number for root bus should have been reserved */
- if (pci_is_root_bus(bus) &&
- phb->ioda.root_pe_idx != IODA_INVALID_PE)
+ if (pci_is_root_bus(bus))
pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
/* Check if PE is determined by M64 */
@@ -1157,7 +955,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
/* The PE number isn't pinned by M64 */
if (!pe)
- pe = pnv_ioda_alloc_pe(phb);
+ pe = pnv_ioda_alloc_pe(phb, 1);
if (!pe) {
pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
@@ -1186,575 +984,66 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
return NULL;
}
- /* Associate it with all child devices */
- pnv_ioda_setup_same_PE(bus, pe);
-
/* Put PE to the list */
list_add_tail(&pe->list, &phb->ioda.pe_list);
return pe;
}
-static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
-{
- int pe_num, found_pe = false, rc;
- long rid;
- struct pnv_ioda_pe *pe;
- struct pci_dev *gpu_pdev;
- struct pci_dn *npu_pdn;
- struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
- struct pnv_phb *phb = hose->private_data;
-
- /*
- * Due to a hardware errata PE#0 on the NPU is reserved for
- * error handling. This means we only have three PEs remaining
- * which need to be assigned to four links, implying some
- * links must share PEs.
- *
- * To achieve this we assign PEs such that NPUs linking the
- * same GPU get assigned the same PE.
- */
- gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
- pe = &phb->ioda.pe_array[pe_num];
- if (!pe->pdev)
- continue;
-
- if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
- /*
- * This device has the same peer GPU so should
- * be assigned the same PE as the existing
- * peer NPU.
- */
- dev_info(&npu_pdev->dev,
- "Associating to existing PE %x\n", pe_num);
- pci_dev_get(npu_pdev);
- npu_pdn = pci_get_pdn(npu_pdev);
- rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pe_number = pe_num;
- phb->ioda.pe_rmap[rid] = pe->pe_number;
-
- /* Map the PE to this link */
- rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
- OpalPciBusAll,
- OPAL_COMPARE_RID_DEVICE_NUMBER,
- OPAL_COMPARE_RID_FUNCTION_NUMBER,
- OPAL_MAP_PE);
- WARN_ON(rc != OPAL_SUCCESS);
- found_pe = true;
- break;
- }
- }
-
- if (!found_pe)
- /*
- * Could not find an existing PE so allocate a new
- * one.
- */
- return pnv_ioda_setup_dev_PE(npu_pdev);
- else
- return pe;
-}
-
-static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
-{
- struct pci_dev *pdev;
-
- list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_npu_PE(pdev);
-}
-
-static void pnv_pci_ioda_setup_PEs(void)
+static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pci_bus *bus;
- struct pci_dev *pdev;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU_NVLINK) {
- /* PE#0 is needed for error reporting */
- pnv_ioda_reserve_pe(phb, 0);
- pnv_ioda_setup_npu_PEs(hose->bus);
- if (phb->model == PNV_PHB_MODEL_NPU2)
- WARN_ON_ONCE(pnv_npu2_init(hose));
- }
- if (phb->type == PNV_PHB_NPU_OCAPI) {
- bus = hose->bus;
- list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_dev_PE(pdev);
- }
- }
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_IODA2)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
- }
-}
-
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pci_dn *pdn;
- int i, j;
- int m64_bars;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
-
- if (pdn->m64_single_mode)
- m64_bars = num_vfs;
- else
- m64_bars = 1;
-
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
- for (j = 0; j < m64_bars; j++) {
- if (pdn->m64_map[j][i] == IODA_INVALID_M64)
- continue;
- opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
- clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
- pdn->m64_map[j][i] = IODA_INVALID_M64;
- }
-
- kfree(pdn->m64_map);
- return 0;
-}
-
-static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pci_dn *pdn;
- unsigned int win;
- struct resource *res;
- int i, j;
- int64_t rc;
- int total_vfs;
- resource_size_t size, start;
- int pe_num;
- int m64_bars;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
- total_vfs = pci_sriov_get_totalvfs(pdev);
-
- if (pdn->m64_single_mode)
- m64_bars = num_vfs;
- else
- m64_bars = 1;
-
- pdn->m64_map = kmalloc_array(m64_bars,
- sizeof(*pdn->m64_map),
- GFP_KERNEL);
- if (!pdn->m64_map)
- return -ENOMEM;
- /* Initialize the m64_map to IODA_INVALID_M64 */
- for (i = 0; i < m64_bars ; i++)
- for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
- pdn->m64_map[i][j] = IODA_INVALID_M64;
-
-
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &pdev->resource[i + PCI_IOV_RESOURCES];
- if (!res->flags || !res->parent)
- continue;
-
- for (j = 0; j < m64_bars; j++) {
- do {
- win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
- phb->ioda.m64_bar_idx + 1, 0);
-
- if (win >= phb->ioda.m64_bar_idx + 1)
- goto m64_failed;
- } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
-
- pdn->m64_map[j][i] = win;
-
- if (pdn->m64_single_mode) {
- size = pci_iov_resource_size(pdev,
- PCI_IOV_RESOURCES + i);
- start = res->start + size * j;
- } else {
- size = resource_size(res);
- start = res->start;
- }
-
- /* Map the M64 here */
- if (pdn->m64_single_mode) {
- pe_num = pdn->pe_num_map[j];
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- pe_num, OPAL_M64_WINDOW_TYPE,
- pdn->m64_map[j][i], 0);
- }
-
- rc = opal_pci_set_phb_mem_window(phb->opal_id,
- OPAL_M64_WINDOW_TYPE,
- pdn->m64_map[j][i],
- start,
- 0, /* unused */
- size);
-
-
- if (rc != OPAL_SUCCESS) {
- dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
- win, rc);
- goto m64_failed;
- }
-
- if (pdn->m64_single_mode)
- rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
- else
- rc = opal_pci_phb_mmio_enable(phb->opal_id,
- OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
-
- if (rc != OPAL_SUCCESS) {
- dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
- win, rc);
- goto m64_failed;
- }
- }
- }
- return 0;
-
-m64_failed:
- pnv_pci_vf_release_m64(pdev, num_vfs);
- return -EBUSY;
-}
-
-static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
- int num);
-
-static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
-{
- struct iommu_table *tbl;
- int64_t rc;
-
- tbl = pe->table_group.tables[0];
- rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
- if (rc)
- pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-
- pnv_pci_ioda2_set_bypass(pe, false);
- if (pe->table_group.group) {
- iommu_group_put(pe->table_group.group);
- BUG_ON(pe->table_group.group);
- }
- iommu_tce_table_put(tbl);
-}
-
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe, *pe_n;
- struct pci_dn *pdn;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
-
- if (!pdev->is_physfn)
- return;
-
- list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
- if (pe->parent_dev != pdev)
- continue;
-
- pnv_pci_ioda2_release_dma_pe(pdev, pe);
-
- /* Remove from list */
- mutex_lock(&phb->ioda.pe_list_mutex);
- list_del(&pe->list);
- mutex_unlock(&phb->ioda.pe_list_mutex);
-
- pnv_ioda_deconfigure_pe(phb, pe);
-
- pnv_ioda_free_pe(pe);
- }
-}
-
-void pnv_pci_sriov_disable(struct pci_dev *pdev)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
- struct pci_dn *pdn;
- u16 num_vfs, i;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
- num_vfs = pdn->num_vfs;
-
- /* Release VF PEs */
- pnv_ioda_release_vf_PE(pdev);
-
- if (phb->type == PNV_PHB_IODA2) {
- if (!pdn->m64_single_mode)
- pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
-
- /* Release M64 windows */
- pnv_pci_vf_release_m64(pdev, num_vfs);
-
- /* Release PE numbers */
- if (pdn->m64_single_mode) {
- for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] == IODA_INVALID_PE)
- continue;
-
- pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
- pnv_ioda_free_pe(pe);
- }
- } else
- bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
- /* Releasing pe_num_map */
- kfree(pdn->pe_num_map);
- }
-}
-
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe);
-#ifdef CONFIG_IOMMU_API
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group, struct pci_bus *bus);
-
-#endif
-static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
- int pe_num;
- u16 vf_index;
- struct pci_dn *pdn;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
-
- if (!pdev->is_physfn)
- return;
-
- /* Reserve PE for each VF */
- for (vf_index = 0; vf_index < num_vfs; vf_index++) {
- if (pdn->m64_single_mode)
- pe_num = pdn->pe_num_map[vf_index];
- else
- pe_num = *pdn->pe_num_map + vf_index;
-
- pe = &phb->ioda.pe_array[pe_num];
- pe->pe_number = pe_num;
- pe->phb = phb;
- pe->flags = PNV_IODA_PE_VF;
- pe->pbus = NULL;
- pe->parent_dev = pdev;
- pe->mve_number = -1;
- pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
- pci_iov_virtfn_devfn(pdev, vf_index);
-
- pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
- hose->global_number, pdev->bus->number,
- PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
- PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
-
- if (pnv_ioda_configure_pe(phb, pe)) {
- /* XXX What do we do here ? */
- pnv_ioda_free_pe(pe);
- pe->pdev = NULL;
- continue;
- }
-
- /* Put PE to the list */
- mutex_lock(&phb->ioda.pe_list_mutex);
- list_add_tail(&pe->list, &phb->ioda.pe_list);
- mutex_unlock(&phb->ioda.pe_list_mutex);
-
- pnv_pci_ioda2_setup_dma_pe(phb, pe);
-#ifdef CONFIG_IOMMU_API
- iommu_register_group(&pe->table_group,
- pe->phb->hose->global_number, pe->pe_number);
- pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
-#endif
- }
-}
-
-int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
- struct pci_dn *pdn;
- int ret;
- u16 i;
-
- bus = pdev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- pdn = pci_get_pdn(pdev);
-
- if (phb->type == PNV_PHB_IODA2) {
- if (!pdn->vfs_expanded) {
- dev_info(&pdev->dev, "don't support this SRIOV device"
- " with non 64bit-prefetchable IOV BAR\n");
- return -ENOSPC;
- }
-
- /*
- * When M64 BARs functions in Single PE mode, the number of VFs
- * could be enabled must be less than the number of M64 BARs.
- */
- if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
- dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
- return -EBUSY;
- }
-
- /* Allocating pe_num_map */
- if (pdn->m64_single_mode)
- pdn->pe_num_map = kmalloc_array(num_vfs,
- sizeof(*pdn->pe_num_map),
- GFP_KERNEL);
- else
- pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
-
- if (!pdn->pe_num_map)
- return -ENOMEM;
-
- if (pdn->m64_single_mode)
- for (i = 0; i < num_vfs; i++)
- pdn->pe_num_map[i] = IODA_INVALID_PE;
-
- /* Calculate available PE for required VFs */
- if (pdn->m64_single_mode) {
- for (i = 0; i < num_vfs; i++) {
- pe = pnv_ioda_alloc_pe(phb);
- if (!pe) {
- ret = -EBUSY;
- goto m64_failed;
- }
+ /* Check if the BDFN for this device is associated with a PE yet */
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ if (!pe) {
+ /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
+ if (WARN_ON(pdev->is_virtfn))
+ return;
- pdn->pe_num_map[i] = pe->pe_number;
- }
- } else {
- mutex_lock(&phb->ioda.pe_alloc_mutex);
- *pdn->pe_num_map = bitmap_find_next_zero_area(
- phb->ioda.pe_alloc, phb->ioda.total_pe_num,
- 0, num_vfs, 0);
- if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
- mutex_unlock(&phb->ioda.pe_alloc_mutex);
- dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
- kfree(pdn->pe_num_map);
- return -EBUSY;
- }
- bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
- mutex_unlock(&phb->ioda.pe_alloc_mutex);
- }
- pdn->num_vfs = num_vfs;
+ pnv_pci_configure_bus(pdev->bus);
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
- /* Assign M64 window accordingly */
- ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
- if (ret) {
- dev_info(&pdev->dev, "Not enough M64 window resources\n");
- goto m64_failed;
- }
/*
- * When using one M64 BAR to map one IOV BAR, we need to shift
- * the IOV BAR according to the PE# allocated to the VFs.
- * Otherwise, the PE# for the VF will conflict with others.
+ * If we can't setup the IODA PE something has gone horribly
+ * wrong and we can't enable DMA for the device.
*/
- if (!pdn->m64_single_mode) {
- ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
- if (ret)
- goto m64_failed;
- }
+ if (WARN_ON(!pe))
+ return;
+ } else {
+ pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
}
- /* Setup VF PEs */
- pnv_ioda_setup_vf_PE(pdev, num_vfs);
-
- return 0;
-
-m64_failed:
- if (pdn->m64_single_mode) {
- for (i = 0; i < num_vfs; i++) {
- if (pdn->pe_num_map[i] == IODA_INVALID_PE)
- continue;
-
- pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
- pnv_ioda_free_pe(pe);
- }
- } else
- bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-
- /* Releasing pe_num_map */
- kfree(pdn->pe_num_map);
-
- return ret;
-}
-
-int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
-{
- pnv_pci_sriov_disable(pdev);
-
- /* Release PCI data */
- remove_dev_pci_data(pdev);
- return 0;
-}
-
-int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
- /* Allocate PCI data */
- add_dev_pci_data(pdev);
-
- return pnv_pci_sriov_enable(pdev, num_vfs);
-}
-#endif /* CONFIG_PCI_IOV */
-
-static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
-{
- struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pnv_ioda_pe *pe;
-
/*
- * The function can be called while the PE#
- * hasn't been assigned. Do nothing for the
- * case.
+ * We assume that bridges *probably* don't need to do any DMA so we can
+ * skip allocating a TCE table, etc unless we get a non-bridge device.
*/
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
- return;
+ if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
+ switch (phb->type) {
+ case PNV_PHB_IODA2:
+ pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ break;
+ default:
+ pr_warn("%s: No DMA for PHB#%x (type %d)\n",
+ __func__, phb->hose->global_number, phb->type);
+ }
+ }
+
+ if (pdn)
+ pdn->pe_number = pe->pe_number;
+ pe->device_count++;
- pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
- /*
- * Note: iommu_add_device() will fail here as
- * for physical PE: the device is already added by now;
- * for virtual PE: sysfs entries are not ready yet and
- * tce_iommu_bus_notifier will add the device to a group later.
- */
+
+ /* PEs with a DMA weight of zero won't have a group */
+ if (pe->table_group.group)
+ iommu_add_device(&pe->table_group, &pdev->dev);
}
/*
@@ -1829,8 +1118,7 @@ err:
static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
u64 dma_mask)
{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
@@ -1867,137 +1155,39 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
return false;
}
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb)
{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
- dev->dev.archdata.dma_offset = pe->tce_bypass_base;
-
- if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate);
- }
-}
-
-static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb,
- bool real_mode)
-{
- return real_mode ? (__be64 __iomem *)(phb->regs_phys + 0x210) :
- (phb->regs + 0x210);
-}
-
-static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
- unsigned long index, unsigned long npages, bool rm)
-{
- struct iommu_table_group_link *tgl = list_first_entry_or_null(
- &tbl->it_group_list, struct iommu_table_group_link,
- next);
- struct pnv_ioda_pe *pe = container_of(tgl->table_group,
- struct pnv_ioda_pe, table_group);
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
- unsigned long start, end, inc;
-
- start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
- end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
- npages - 1);
-
- /* p7ioc-style invalidation, 2 TCEs per write */
- start |= (1ull << 63);
- end |= (1ull << 63);
- inc = 16;
- end |= inc - 1; /* round up end to be different than start */
-
- mb(); /* Ensure above stores are visible */
- while (start <= end) {
- if (rm)
- __raw_rm_writeq_be(start, invalidate);
- else
- __raw_writeq_be(start, invalidate);
-
- start += inc;
- }
-
- /*
- * The iommu layer will do another mb() for us on build()
- * and we don't care on free()
- */
-}
-
-static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
- long npages, unsigned long uaddr,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
- attrs);
-
- if (!ret)
- pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
-
- return ret;
+ return phb->regs + 0x210;
}
#ifdef CONFIG_IOMMU_API
/* Common for IODA1 and IODA2 */
static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
- unsigned long *hpa, enum dma_data_direction *direction,
- bool realmode)
+ unsigned long *hpa, enum dma_data_direction *direction)
{
- return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
+ return pnv_tce_xchg(tbl, index, hpa, direction);
}
#endif
-static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
- long npages)
-{
- pnv_tce_free(tbl, index, npages);
-
- pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
-}
-
-static struct iommu_table_ops pnv_ioda1_iommu_ops = {
- .set = pnv_ioda1_tce_build,
-#ifdef CONFIG_IOMMU_API
- .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
- .tce_kill = pnv_pci_p7ioc_tce_invalidate,
- .useraddrptr = pnv_tce_useraddrptr,
-#endif
- .clear = pnv_ioda1_tce_free,
- .get = pnv_tce_get,
-};
-
#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0)
#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2)
-static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
- const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq_be(val, invalidate);
- else
- __raw_writeq_be(val, invalidate);
-}
-
static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
mb(); /* Ensure above stores are visible */
__raw_writeq_be(val, invalidate);
}
-static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
+static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe,
unsigned shift, unsigned long index,
unsigned long npages)
{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
unsigned long start, end, inc;
/* We'll invalidate DMA address in PE scope */
@@ -2012,10 +1202,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
mb();
while (start <= end) {
- if (rm)
- __raw_rm_writeq_be(start, invalidate);
- else
- __raw_writeq_be(start, invalidate);
+ __raw_writeq_be(start, invalidate);
start += inc;
}
}
@@ -2032,7 +1219,7 @@ static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
}
static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
- unsigned long index, unsigned long npages, bool rm)
+ unsigned long index, unsigned long npages)
{
struct iommu_table_group_link *tgl;
@@ -2042,22 +1229,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
struct pnv_phb *phb = pe->phb;
unsigned int shift = tbl->it_page_shift;
- /*
- * NVLink1 can use the TCE kill register directly as
- * it's the same as PHB3. NVLink2 is different and
- * should go via the OPAL call.
- */
- if (phb->model == PNV_PHB_MODEL_NPU) {
- /*
- * The NVLink hardware does not support TCE kill
- * per TCE entry so we have to invalidate
- * the entire cache for it.
- */
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- continue;
- }
if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
- pnv_pci_phb3_tce_invalidate(pe, rm, shift,
+ pnv_pci_phb3_tce_invalidate(pe, shift,
index, npages);
else
opal_pci_tce_kill(phb->opal_id,
@@ -2067,14 +1240,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
}
}
-void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- else
- opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
-}
-
static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
@@ -2084,7 +1249,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
attrs);
if (!ret)
- pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
return ret;
}
@@ -2094,7 +1259,7 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
{
pnv_tce_free(tbl, index, npages);
- pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
}
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
@@ -2109,178 +1274,6 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.free = pnv_pci_ioda2_table_free_pages,
};
-static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
-{
- unsigned int *weight = (unsigned int *)data;
-
- /* This is quite simplistic. The "base" weight of a device
- * is 10. 0 means no DMA is to be accounted for it.
- */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
- return 0;
-
- if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
- dev->class == PCI_CLASS_SERIAL_USB_EHCI)
- *weight += 3;
- else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
- *weight += 15;
- else
- *weight += 10;
-
- return 0;
-}
-
-static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
-{
- unsigned int weight = 0;
-
- /* SRIOV VF has same DMA32 weight as its PF */
-#ifdef CONFIG_PCI_IOV
- if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
- pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
- return weight;
- }
-#endif
-
- if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
- pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
- } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
- struct pci_dev *pdev;
-
- list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
- pnv_pci_ioda_dev_dma_weight(pdev, &weight);
- } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
- pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
- }
-
- return weight;
-}
-
-static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
-{
-
- struct page *tce_mem = NULL;
- struct iommu_table *tbl;
- unsigned int weight, total_weight = 0;
- unsigned int tce32_segsz, base, segs, avail, i;
- int64_t rc;
- void *addr;
-
- /* XXX FIXME: Handle 64-bit only DMA devices */
- /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
- /* XXX FIXME: Allocate multi-level tables on PHB3 */
- weight = pnv_pci_ioda_pe_dma_weight(pe);
- if (!weight)
- return;
-
- pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
- &total_weight);
- segs = (weight * phb->ioda.dma32_count) / total_weight;
- if (!segs)
- segs = 1;
-
- /*
- * Allocate contiguous DMA32 segments. We begin with the expected
- * number of segments. With one more attempt, the number of DMA32
- * segments to be allocated is decreased by one until one segment
- * is allocated successfully.
- */
- do {
- for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
- for (avail = 0, i = base; i < base + segs; i++) {
- if (phb->ioda.dma32_segmap[i] ==
- IODA_INVALID_PE)
- avail++;
- }
-
- if (avail == segs)
- goto found;
- }
- } while (--segs);
-
- if (!segs) {
- pe_warn(pe, "No available DMA32 segments\n");
- return;
- }
-
-found:
- tbl = pnv_pci_table_alloc(phb->hose->node);
- if (WARN_ON(!tbl))
- return;
-
- iommu_register_group(&pe->table_group, phb->hose->global_number,
- pe->pe_number);
- pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
-
- /* Grab a 32-bit TCE table */
- pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
- weight, total_weight, base, segs);
- pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
- base * PNV_IODA1_DMA32_SEGSIZE,
- (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
-
- /* XXX Currently, we allocate one big contiguous table for the
- * TCEs. We only really need one chunk per 256M of TCE space
- * (ie per segment) but that's an optimization for later, it
- * requires some added smarts with our get/put_tce implementation
- *
- * Each TCE page is 4KB in size and each TCE entry occupies 8
- * bytes
- */
- tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
- tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(tce32_segsz * segs));
- if (!tce_mem) {
- pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
- goto fail;
- }
- addr = page_address(tce_mem);
- memset(addr, 0, tce32_segsz * segs);
-
- /* Configure HW */
- for (i = 0; i < segs; i++) {
- rc = opal_pci_map_pe_dma_window(phb->opal_id,
- pe->pe_number,
- base + i, 1,
- __pa(addr) + tce32_segsz * i,
- tce32_segsz, IOMMU_PAGE_SIZE_4K);
- if (rc) {
- pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n",
- rc);
- goto fail;
- }
- }
-
- /* Setup DMA32 segment mapping */
- for (i = base; i < base + segs; i++)
- phb->ioda.dma32_segmap[i] = pe->pe_number;
-
- /* Setup linux iommu table */
- pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
- base * PNV_IODA1_DMA32_SEGSIZE,
- IOMMU_PAGE_SHIFT_4K);
-
- tbl->it_ops = &pnv_ioda1_iommu_ops;
- pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
- pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
- iommu_init_table(tbl, phb->hose->node, 0, 0);
-
- if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
-
- return;
- fail:
- /* XXX Failure: Try to fallback to 64-bit only ? */
- if (tce_mem)
- __free_pages(tce_mem, get_order(tce32_segsz * segs));
- if (tbl) {
- pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
- iommu_tce_table_put(tbl);
- }
-}
-
static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
int num, struct iommu_table *tbl)
{
@@ -2396,7 +1389,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
* DMA window can be larger than available memory, which will
* cause errors later.
*/
- const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER - 1);
+ const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_PAGE_ORDER);
/*
* We create the default window as big as we can. The constraint is
@@ -2435,16 +1428,17 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
}
- iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end);
- rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number;
+ if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ else
+ rc = -ENOMEM;
if (rc) {
- pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
- rc);
+ pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
iommu_tce_table_put(tbl);
- return rc;
+ tbl = NULL; /* This clears iommu_table_base below */
}
-
if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);
@@ -2459,7 +1453,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
return 0;
}
-#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
int num)
{
@@ -2483,7 +1476,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
return ret;
}
-#endif
#ifdef CONFIG_IOMMU_API
unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
@@ -2507,7 +1499,7 @@ unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
direct_table_size = 1UL << table_shift;
for ( ; levels; --levels) {
- bytes += _ALIGN_UP(tce_table_size, direct_table_size);
+ bytes += ALIGN(tce_table_size, direct_table_size);
tce_table_size /= direct_table_size;
tce_table_size <<= 3;
@@ -2532,13 +1524,33 @@ static long pnv_pci_ioda2_create_table_userspace(
return ret;
}
-static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+ dev->dev.archdata.dma_offset = pe->tce_bypass_base;
+
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ }
+}
+
+static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
{
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
table_group);
/* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
struct iommu_table *tbl = pe->table_group.tables[0];
+ /*
+ * iommu_ops transfers the ownership per a device and we mode
+ * the group ownership with the first device in the group.
+ */
+ if (!tbl)
+ return 0;
+
pnv_pci_ioda2_set_bypass(pe, false);
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (pe->pbus)
@@ -2546,6 +1558,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
else if (pe->pdev)
set_iommu_table_base(&pe->pdev->dev, NULL);
iommu_tce_table_put(tbl);
+
+ return 0;
}
static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
@@ -2553,6 +1567,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
table_group);
+ /* See the comment about iommu_ops above */
+ if (pe->table_group.tables[0])
+ return;
pnv_pci_ioda2_setup_default_config(pe);
if (pe->pbus)
pnv_ioda_setup_bus_dma(pe, pe->pbus);
@@ -2566,145 +1583,13 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.take_ownership = pnv_ioda2_take_ownership,
.release_ownership = pnv_ioda2_release_ownership,
};
-
-static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group,
- struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- iommu_add_device(table_group, &dev->dev);
-
- if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_iommu_group_add_devices(pe,
- table_group, dev->subordinate);
- }
-}
-
-static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
- struct iommu_table_group *table_group, struct pci_bus *bus)
-{
-
- if (pe->flags & PNV_IODA_PE_DEV)
- iommu_add_device(table_group, &pe->pdev->dev);
-
- if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
- pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
- bus);
-}
-
-static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
-
-static void pnv_pci_ioda_setup_iommu_api(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- /*
- * There are 4 types of PEs:
- * - PNV_IODA_PE_BUS: a downstream port with an adapter,
- * created from pnv_pci_setup_bridge();
- * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
- * created from pnv_pci_setup_bridge();
- * - PNV_IODA_PE_VF: a SRIOV virtual function,
- * created from pnv_pcibios_sriov_enable();
- * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
- * created from pnv_pci_ioda_fixup().
- *
- * Normally a PE is represented by an IOMMU group, however for
- * devices with side channels the groups need to be more strict.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->type == PNV_PHB_NPU_NVLINK ||
- phb->type == PNV_PHB_NPU_OCAPI)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- struct iommu_table_group *table_group;
-
- table_group = pnv_try_setup_npu_table_group(pe);
- if (!table_group) {
- if (!pnv_pci_ioda_pe_dma_weight(pe))
- continue;
-
- table_group = &pe->table_group;
- iommu_register_group(&pe->table_group,
- pe->phb->hose->global_number,
- pe->pe_number);
- }
- pnv_ioda_setup_bus_iommu_group(pe, table_group,
- pe->pbus);
- }
- }
-
- /*
- * Now we have all PHBs discovered, time to add NPU devices to
- * the corresponding IOMMU groups.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- unsigned long pgsizes;
-
- phb = hose->private_data;
-
- if (phb->type != PNV_PHB_NPU_NVLINK)
- continue;
-
- pgsizes = pnv_ioda_parse_tce_sizes(phb);
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- /*
- * IODA2 bridges get this set up from
- * pci_controller_ops::setup_bridge but NPU bridges
- * do not have this hook defined so we do it here.
- */
- pe->table_group.pgsizes = pgsizes;
- pnv_npu_compound_attach(pe);
- }
- }
-}
-#else /* !CONFIG_IOMMU_API */
-static void pnv_pci_ioda_setup_iommu_api(void) { };
#endif
-static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
-{
- struct pci_controller *hose = phb->hose;
- struct device_node *dn = hose->dn;
- unsigned long mask = 0;
- int i, rc, count;
- u32 val;
-
- count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
- if (count <= 0) {
- mask = SZ_4K | SZ_64K;
- /* Add 16M for POWER8 by default */
- if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
- !cpu_has_feature(CPU_FTR_ARCH_300))
- mask |= SZ_16M | SZ_256M;
- return mask;
- }
-
- for (i = 0; i < count; i++) {
- rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
- i, &val);
- if (rc == 0)
- mask |= 1ULL << val;
- }
-
- return mask;
-}
-
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
{
int64_t rc;
- if (!pnv_pci_ioda_pe_dma_weight(pe))
- return;
-
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
@@ -2719,39 +1604,53 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
IOMMU_TABLE_GROUP_MAX_TABLES;
pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
-#ifdef CONFIG_IOMMU_API
- pe->table_group.ops = &pnv_pci_ioda2_ops;
-#endif
rc = pnv_pci_ioda2_setup_default_config(pe);
if (rc)
return;
- if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+#endif
+ pe->dma_setup_done = true;
}
-int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
{
- struct pnv_phb *phb = container_of(chip, struct pnv_phb,
- ioda.irq_chip);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+ struct pnv_phb *phb = hose->private_data;
- return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
}
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
static void pnv_ioda2_msi_eoi(struct irq_data *d)
{
int64_t rc;
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
- struct irq_chip *chip = irq_data_get_irq_chip(d);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
- rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
+ rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
WARN_ON_ONCE(rc);
icp_native_eoi(d);
}
-
+/* P8/CXL only */
void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
{
struct irq_data *idata;
@@ -2773,27 +1672,32 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
}
irq_set_chip(virq, &phb->ioda.irq_chip);
+ irq_set_chip_data(virq, phb->hose);
}
+static struct irq_chip pnv_pci_msi_irq_chip;
+
/*
* Returns true iff chip is something that we could call
* pnv_opal_pci_msi_eoi for.
*/
bool is_pnv_opal_msi(struct irq_chip *chip)
{
- return chip->irq_eoi == pnv_ioda2_msi_eoi;
+ return chip == &pnv_pci_msi_irq_chip;
}
EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
-static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
- unsigned int hwirq, unsigned int virq,
- unsigned int is_64, struct msi_msg *msg)
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+ unsigned int xive_num,
+ unsigned int is_64, struct msi_msg *msg)
{
struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
- unsigned int xive_num = hwirq - phb->msi_base;
__be32 data;
int rc;
+ dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+ is_64 ? "64" : "32", xive_num);
+
/* No PE assigned ? bail out ... no MSI for you ! */
if (pe == NULL)
return -ENXIO;
@@ -2841,17 +1745,214 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
}
msg->data = be32_to_cpu(data);
- pnv_set_msi_irq_chip(phb, virq);
+ return 0;
+}
+
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
- pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
- " address=%x_%08x data=%x PE# %x\n",
- pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
- msg->address_hi, msg->address_lo, data, pe->pe_number);
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+ .msi_free = pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+ .name = "PNV-PCI-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = pnv_msi_mask,
+ .irq_unmask = pnv_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &pnv_pci_msi_domain_ops,
+ .chip = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct msi_desc *entry = irq_data_get_msi_desc(d);
+ struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+ entry->pci.msi_attrib.is_64, msg);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+ entry->pci.msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_PHB3) {
+ /*
+ * The EOI OPAL call takes an OPAL HW IRQ number but
+ * since it is translated into a vector number in
+ * OPAL, use that directly.
+ */
+ WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+ }
+
+ irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+ .name = "PNV-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = pnv_msi_eoi,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, int hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *hose = domain->host_data;
+ struct pnv_phb *phb = hose->private_data;
+ msi_alloc_info_t *info = arg;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+ if (hwirq < 0) {
+ dev_warn(&pdev->dev, "failed to find a free MSI\n");
+ return -ENOSPC;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ hose->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+ phb->msi_base + hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pnv_msi_irq_chip, hose);
+ }
return 0;
+
+out:
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+ return ret;
}
-static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+ virq, d->hwirq, nr_irqs);
+
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+ /* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+ .alloc = pnv_irq_domain_alloc,
+ .free = pnv_irq_domain_free,
+};
+
+static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct irq_domain *parent = irq_get_default_host();
+
+ hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+ if (!hose->fwnode)
+ return -ENOMEM;
+
+ hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ hose->fwnode,
+ &pnv_irq_domain_ops, hose);
+ if (!hose->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ return -ENOMEM;
+ }
+
+ hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+ &pnv_msi_domain_info,
+ hose->dev_domain);
+ if (!hose->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ irq_domain_remove(hose->dev_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb)
{
unsigned int count;
const __be32 *prop = of_get_property(phb->hose->dn,
@@ -2871,102 +1972,11 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
return;
}
- phb->msi_setup = pnv_pci_ioda_msi_setup;
- phb->msi32_support = 1;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
-}
-
-#ifdef CONFIG_PCI_IOV
-static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- const resource_size_t gate = phb->ioda.m64_segsize >> 2;
- struct resource *res;
- int i;
- resource_size_t size, total_vf_bar_sz;
- struct pci_dn *pdn;
- int mul, total_vfs;
- if (!pdev->is_physfn || pci_dev_is_added(pdev))
- return;
-
- pdn = pci_get_pdn(pdev);
- pdn->vfs_expanded = 0;
- pdn->m64_single_mode = false;
-
- total_vfs = pci_sriov_get_totalvfs(pdev);
- mul = phb->ioda.total_pe_num;
- total_vf_bar_sz = 0;
-
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &pdev->resource[i + PCI_IOV_RESOURCES];
- if (!res->flags || res->parent)
- continue;
- if (!pnv_pci_is_m64_flags(res->flags)) {
- dev_warn(&pdev->dev, "Don't support SR-IOV with"
- " non M64 VF BAR%d: %pR. \n",
- i, res);
- goto truncate_iov;
- }
-
- total_vf_bar_sz += pci_iov_resource_size(pdev,
- i + PCI_IOV_RESOURCES);
-
- /*
- * If bigger than quarter of M64 segment size, just round up
- * power of two.
- *
- * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
- * with other devices, IOV BAR size is expanded to be
- * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64
- * segment size , the expanded size would equal to half of the
- * whole M64 space size, which will exhaust the M64 Space and
- * limit the system flexibility. This is a design decision to
- * set the boundary to quarter of the M64 segment size.
- */
- if (total_vf_bar_sz > gate) {
- mul = roundup_pow_of_two(total_vfs);
- dev_info(&pdev->dev,
- "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
- total_vf_bar_sz, gate, mul);
- pdn->m64_single_mode = true;
- break;
- }
- }
-
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &pdev->resource[i + PCI_IOV_RESOURCES];
- if (!res->flags || res->parent)
- continue;
-
- size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
- /*
- * On PHB3, the minimum size alignment of M64 BAR in single
- * mode is 32MB.
- */
- if (pdn->m64_single_mode && (size < SZ_32M))
- goto truncate_iov;
- dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
- res->end = res->start + size * mul - 1;
- dev_dbg(&pdev->dev, " %pR\n", res);
- dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
- i, res, mul);
- }
- pdn->vfs_expanded = mul;
-
- return;
-
-truncate_iov:
- /* To save MMIO space, IOV BAR is truncated. */
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- res = &pdev->resource[i + PCI_IOV_RESOURCES];
- res->flags = 0;
- res->end = res->start - 1;
- }
+ pnv_msi_allocate_domains(phb->hose, count);
}
-#endif /* CONFIG_PCI_IOV */
static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
struct resource *res)
@@ -2976,7 +1986,8 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
int index;
int64_t rc;
- if (!res || !res->flags || res->start > res->end)
+ if (!res || !res->flags || res->start > res->end ||
+ res->flags & IORESOURCE_UNSET)
return;
if (res->flags & IORESOURCE_IO) {
@@ -3027,7 +2038,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
/*
* This function is supposed to be called on basis of PE from top
- * to bottom style. So the the I/O or MMIO segment assigned to
+ * to bottom style. So the I/O or MMIO segment assigned to
* parent PE could be overridden by its child PEs if necessary.
*/
static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
@@ -3062,19 +2073,9 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
#ifdef CONFIG_DEBUG_FS
static int pnv_pci_diag_data_set(void *data, u64 val)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
+ struct pnv_phb *phb = data;
s64 ret;
- if (val != 1ULL)
- return -EINVAL;
-
- hose = (struct pci_controller *)data;
- if (!hose || !hose->private_data)
- return -ENODEV;
-
- phb = hose->private_data;
-
/* Retrieve the diag data from firmware */
ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
phb->diag_data_size);
@@ -3089,6 +2090,33 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
"%llu\n");
+static int pnv_pci_ioda_pe_dump(void *data, u64 val)
+{
+ struct pnv_phb *phb = data;
+ int pe_num;
+
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
+ struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num];
+
+ if (!test_bit(pe_num, phb->ioda.pe_alloc))
+ continue;
+
+ pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n",
+ pe->rid, pe->device_count,
+ (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "",
+ (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "",
+ (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "",
+ (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "",
+ (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "",
+ (pe->flags & PNV_IODA_PE_VF) ? "vf " : "");
+ }
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL,
+ pnv_pci_ioda_pe_dump, "%llu\n");
+
#endif /* CONFIG_DEBUG_FS */
static void pnv_pci_ioda_create_dbgfs(void)
@@ -3101,19 +2129,13 @@ static void pnv_pci_ioda_create_dbgfs(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- /* Notify initialization of PHB done */
- phb->initialized = 1;
-
sprintf(name, "PCI%04x", hose->global_number);
- phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
- if (!phb->dbgfs) {
- pr_warn("%s: Error on creating debugfs on PHB#%x\n",
- __func__, hose->global_number);
- continue;
- }
+ phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
- hose, &pnv_pci_diag_data_fops);
+ phb, &pnv_pci_diag_data_fops);
+ debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs,
+ phb, &pnv_pci_ioda_pe_dump_fops);
}
#endif /* CONFIG_DEBUG_FS */
}
@@ -3155,8 +2177,6 @@ static void pnv_pci_enable_bridges(void)
static void pnv_pci_ioda_fixup(void)
{
- pnv_pci_ioda_setup_PEs();
- pnv_pci_ioda_setup_iommu_api();
pnv_pci_ioda_create_dbgfs();
pnv_pci_enable_bridges();
@@ -3181,10 +2201,9 @@ static void pnv_pci_ioda_fixup(void)
static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
unsigned long type)
{
- struct pci_dev *bridge;
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
int num_pci_bridges = 0;
+ struct pci_dev *bridge;
bridge = bus->self;
while (bridge) {
@@ -3268,28 +2287,16 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
}
}
-static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
+static void pnv_pci_configure_bus(struct pci_bus *bus)
{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pnv_phb *phb = hose->private_data;
struct pci_dev *bridge = bus->self;
struct pnv_ioda_pe *pe;
- bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
+ bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
- /* Extend bridge's windows if necessary */
- pnv_pci_fixup_bridge_resources(bus, type);
-
- /* The PE for root bus should be realized before any one else */
- if (!phb->ioda.root_pe_populated) {
- pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
- if (pe) {
- phb->ioda.root_pe_idx = pe->pe_number;
- phb->ioda.root_pe_populated = true;
- }
- }
+ dev_info(&bus->dev, "Configuring PE for bus\n");
/* Don't assign PE to PCI bus, which doesn't have subordinate devices */
- if (list_empty(&bus->devices))
+ if (WARN_ON(list_empty(&bus->devices)))
return;
/* Reserve PEs according to used M64 resources */
@@ -3305,17 +2312,6 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
return;
pnv_ioda_setup_pe_seg(pe);
- switch (phb->type) {
- case PNV_PHB_IODA1:
- pnv_pci_ioda1_setup_dma_pe(phb, pe);
- break;
- case PNV_PHB_IODA2:
- pnv_pci_ioda2_setup_dma_pe(phb, pe);
- break;
- default:
- pr_warn("%s: No DMA for PHB#%x (type %d)\n",
- __func__, phb->hose->global_number, phb->type);
- }
}
static resource_size_t pnv_pci_default_alignment(void)
@@ -3323,134 +2319,50 @@ static resource_size_t pnv_pci_default_alignment(void)
return PAGE_SIZE;
}
-#ifdef CONFIG_PCI_IOV
-static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
- int resno)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(pdev);
- resource_size_t align;
-
- /*
- * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
- * SR-IOV. While from hardware perspective, the range mapped by M64
- * BAR should be size aligned.
- *
- * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
- * powernv-specific hardware restriction is gone. But if just use the
- * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
- * in one segment of M64 #15, which introduces the PE conflict between
- * PF and VF. Based on this, the minimum alignment of an IOV BAR is
- * m64_segsize.
- *
- * This function returns the total IOV BAR size if M64 BAR is in
- * Shared PE mode or just VF BAR size if not.
- * If the M64 BAR is in Single PE mode, return the VF BAR size or
- * M64 segment size if IOV BAR size is less.
- */
- align = pci_iov_resource_size(pdev, resno);
- if (!pdn->vfs_expanded)
- return align;
- if (pdn->m64_single_mode)
- return max(align, (resource_size_t)phb->ioda.m64_segsize);
-
- return pdn->vfs_expanded * align;
-}
-#endif /* CONFIG_PCI_IOV */
-
/* Prevent enabling devices for which we couldn't properly
* assign a PE
*/
static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn;
- /* The function is probably called while the PEs have
- * not be created yet. For example, resource reassignment
- * during PCI probe period. We just skip the check if
- * PEs isn't ready.
- */
- if (!phb->initialized)
- return true;
-
pdn = pci_get_pdn(dev);
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
+ pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n");
return false;
-
- return true;
-}
-
-static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
- int num)
-{
- struct pnv_ioda_pe *pe = container_of(table_group,
- struct pnv_ioda_pe, table_group);
- struct pnv_phb *phb = pe->phb;
- unsigned int idx;
- long rc;
-
- pe_info(pe, "Removing DMA window #%d\n", num);
- for (idx = 0; idx < phb->ioda.dma32_count; idx++) {
- if (phb->ioda.dma32_segmap[idx] != pe->pe_number)
- continue;
-
- rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
- idx, 0, 0ul, 0ul, 0ul);
- if (rc != OPAL_SUCCESS) {
- pe_warn(pe, "Failure %ld unmapping DMA32 segment#%d\n",
- rc, idx);
- return rc;
- }
-
- phb->ioda.dma32_segmap[idx] = IODA_INVALID_PE;
}
- pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
- return OPAL_SUCCESS;
+ return true;
}
-static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
+static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
{
- unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
- struct iommu_table *tbl = pe->table_group.tables[0];
- int64_t rc;
-
- if (!weight)
- return;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *pe;
- rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
- if (rc != OPAL_SUCCESS)
- return;
+ pdn = pci_get_pdn(dev);
+ if (!pdn)
+ return false;
- pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false);
- if (pe->table_group.group) {
- iommu_group_put(pe->table_group.group);
- WARN_ON(pe->table_group.group);
+ if (pdn->pe_number == IODA_INVALID_PE) {
+ pe = pnv_ioda_setup_dev_PE(dev);
+ if (!pe)
+ return false;
}
-
- free_pages(tbl->it_base, get_order(tbl->it_size << 3));
- iommu_tce_table_put(tbl);
+ return true;
}
-static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
{
struct iommu_table *tbl = pe->table_group.tables[0];
- unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
-#ifdef CONFIG_IOMMU_API
int64_t rc;
-#endif
- if (!weight)
+ if (!pe->dma_setup_done)
return;
-#ifdef CONFIG_IOMMU_API
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-#endif
pnv_pci_ioda2_set_bypass(pe, false);
if (pe->table_group.group) {
@@ -3473,14 +2385,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
if (map[idx] != pe->pe_number)
continue;
- if (win == OPAL_M64_WINDOW_TYPE)
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- phb->ioda.reserved_pe_idx, win,
- idx / PNV_IODA1_M64_SEGS,
- idx % PNV_IODA1_M64_SEGS);
- else
- rc = opal_pci_map_pe_mmio_window(phb->opal_id,
- phb->ioda.reserved_pe_idx, win, 0, idx);
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ phb->ioda.reserved_pe_idx, win, 0, idx);
if (rc != OPAL_SUCCESS)
pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
@@ -3494,14 +2400,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
{
struct pnv_phb *phb = pe->phb;
- if (phb->type == PNV_PHB_IODA1) {
- pnv_ioda_free_pe_seg(pe, OPAL_IO_WINDOW_TYPE,
- phb->ioda.io_segmap);
- pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
- phb->ioda.m32_segmap);
- pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
- phb->ioda.m64_segmap);
- } else if (phb->type == PNV_PHB_IODA2) {
+ if (phb->type == PNV_PHB_IODA2) {
pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
phb->ioda.m32_segmap);
}
@@ -3512,14 +2411,18 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
struct pnv_phb *phb = pe->phb;
struct pnv_ioda_pe *slave, *tmp;
+ pe_info(pe, "Releasing PE\n");
+
+ mutex_lock(&phb->ioda.pe_list_mutex);
list_del(&pe->list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
switch (phb->type) {
- case PNV_PHB_IODA1:
- pnv_pci_ioda1_release_pe_dma(pe);
- break;
case PNV_PHB_IODA2:
pnv_pci_ioda2_release_pe_dma(pe);
break;
+ case PNV_PHB_NPU_OCAPI:
+ break;
default:
WARN_ON(1);
}
@@ -3541,26 +2444,35 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
* that it can be populated again in PCI hot add path. The PE
* shouldn't be destroyed as it's the global reserved resource.
*/
- if (phb->ioda.root_pe_populated &&
- phb->ioda.root_pe_idx == pe->pe_number)
- phb->ioda.root_pe_populated = false;
- else
- pnv_ioda_free_pe(pe);
+ if (phb->ioda.root_pe_idx == pe->pe_number)
+ return;
+
+ pnv_ioda_free_pe(pe);
}
static void pnv_pci_release_device(struct pci_dev *pdev)
{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
+ /* The VF PE state is torn down when sriov_disable() is called */
if (pdev->is_virtfn)
return;
if (!pdn || pdn->pe_number == IODA_INVALID_PE)
return;
+#ifdef CONFIG_PCI_IOV
+ /*
+ * FIXME: Try move this to sriov_disable(). It's here since we allocate
+ * the iov state at probe time since we need to fiddle with the IOV
+ * resources.
+ */
+ if (pdev->is_physfn)
+ kfree(pdev->dev.archdata.iov_data);
+#endif
+
/*
* PCI hotplug can happen as part of EEH error recovery. The @pdn
* isn't removed and added afterwards in this scenario. We should
@@ -3577,50 +2489,72 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
pnv_ioda_release_pe(pe);
}
-static void pnv_npu_disable_device(struct pci_dev *pdev)
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
+{
+ struct pnv_phb *phb = hose->private_data;
+
+ opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
+ OPAL_ASSERT_RESET);
+}
+
+static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
{
- struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
- struct eeh_pe *eehpe = edev ? edev->pe : NULL;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
- if (eehpe && eeh_ops && eeh_ops->reset)
- eeh_ops->reset(eehpe, EEH_RESET_HOT);
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
}
-static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
+#ifdef CONFIG_IOMMU_API
+static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
+ struct pci_dev *pdev)
{
struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
- opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
- OPAL_ASSERT_RESET);
+ if (WARN_ON(!phb))
+ return ERR_PTR(-ENODEV);
+
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ if (!pe)
+ return ERR_PTR(-ENODEV);
+
+ if (!pe->table_group.group)
+ return ERR_PTR(-ENODEV);
+
+ return iommu_group_ref_get(pe->table_group.group);
}
+#endif
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .dma_bus_setup = pnv_pci_dma_bus_setup,
+ .dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
.iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
.release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
- .setup_bridge = pnv_pci_setup_bridge,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .shutdown = pnv_pci_ioda_shutdown,
-};
-
-static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
+ .setup_bridge = pnv_pci_fixup_bridge_resources,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.shutdown = pnv_pci_ioda_shutdown,
- .disable_device = pnv_npu_disable_device,
+#ifdef CONFIG_IOMMU_API
+ .device_group = pnv_pci_device_group,
+#endif
};
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
- .enable_device_hook = pnv_pci_enable_device_hook,
+ .enable_device_hook = pnv_ocapi_enable_device_hook,
+ .release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.shutdown = pnv_pci_ioda_shutdown,
@@ -3632,7 +2566,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
struct pci_controller *hose;
struct pnv_phb *phb;
unsigned long size, m64map_off, m32map_off, pemap_off;
- unsigned long iomap_off = 0, dma32map_off = 0;
+ struct pnv_ioda_pe *root_pe;
struct resource r;
const __be64 *prop64;
const __be32 *prop32;
@@ -3655,7 +2589,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb_id = be64_to_cpup(prop64);
pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
- phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES);
+ phb = kzalloc(sizeof(*phb), GFP_KERNEL);
if (!phb)
panic("%s: Failed to allocate %zu bytes\n", __func__,
sizeof(*phb));
@@ -3665,7 +2599,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (!phb->hose) {
pr_err(" Can't allocate PCI controller for %pOF\n",
np);
- memblock_free(__pa(phb), sizeof(struct pnv_phb));
+ memblock_free(phb, sizeof(struct pnv_phb));
return;
}
@@ -3690,10 +2624,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->model = PNV_PHB_MODEL_P7IOC;
else if (of_device_is_compatible(np, "ibm,power8-pciex"))
phb->model = PNV_PHB_MODEL_PHB3;
- else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU;
- else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU2;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
@@ -3704,7 +2634,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
else
phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
- phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES);
+ phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL);
if (!phb->diag_data)
panic("%s: Failed to allocate %u bytes\n", __func__,
phb->diag_data_size);
@@ -3746,29 +2676,19 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
- /* Calculate how many 32-bit TCE segments we have */
- phb->ioda.dma32_count = phb->ioda.m32_pci_base /
- PNV_IODA1_DMA32_SEGSIZE;
-
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
- size = _ALIGN_UP(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
sizeof(unsigned long));
m64map_off = size;
size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
m32map_off = size;
size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
- if (phb->type == PNV_PHB_IODA1) {
- iomap_off = size;
- size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
- dma32map_off = size;
- size += phb->ioda.dma32_count *
- sizeof(phb->ioda.dma32_segmap[0]);
- }
pemap_off = size;
size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
- aux = memblock_alloc(size, SMP_CACHE_BYTES);
+ aux = kzalloc(size, GFP_KERNEL);
if (!aux)
panic("%s: Failed to allocate %lu bytes\n", __func__, size);
+
phb->ioda.pe_alloc = aux;
phb->ioda.m64_segmap = aux + m64map_off;
phb->ioda.m32_segmap = aux + m32map_off;
@@ -3776,15 +2696,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
}
- if (phb->type == PNV_PHB_IODA1) {
- phb->ioda.io_segmap = aux + iomap_off;
- for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
- phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
-
- phb->ioda.dma32_segmap = aux + dma32map_off;
- for (segno = 0; segno < phb->ioda.dma32_count; segno++)
- phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
- }
phb->ioda.pe_array = aux + pemap_off;
/*
@@ -3800,16 +2711,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
} else {
- phb->ioda.root_pe_idx = IODA_INVALID_PE;
+ /* otherwise just allocate one */
+ root_pe = pnv_ioda_alloc_pe(phb, 1);
+ phb->ioda.root_pe_idx = root_pe->pe_number;
}
INIT_LIST_HEAD(&phb->ioda.pe_list);
mutex_init(&phb->ioda.pe_list_mutex);
- /* Calculate how many 32-bit TCE segments we have */
- phb->ioda.dma32_count = phb->ioda.m32_pci_base /
- PNV_IODA1_DMA32_SEGSIZE;
-
#if 0 /* We should really do that ... */
rc = opal_pci_set_phb_mem_window(opal->phb_id,
window_type,
@@ -3848,21 +2757,17 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
switch (phb->type) {
- case PNV_PHB_NPU_NVLINK:
- hose->controller_ops = pnv_npu_ioda_controller_ops;
- break;
case PNV_PHB_NPU_OCAPI:
hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
break;
default:
- phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
}
ppc_md.pcibios_default_alignment = pnv_pci_default_alignment;
#ifdef CONFIG_PCI_IOV
- ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
+ ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov;
ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
@@ -3894,6 +2799,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Remove M64 resource if we can't configure it successfully */
if (!phb->init_m64 || phb->init_m64(phb))
hose->mem_resources[1].flags = 0;
+
+ /* create pci_dn's for DT nodes under this PHB */
+ pci_devs_phb_init_dynamic(hose);
}
void __init pnv_pci_init_ioda2_phb(struct device_node *np)
@@ -3901,11 +2809,6 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
}
-void __init pnv_pci_init_npu_phb(struct device_node *np)
-{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
-}
-
void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
{
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
@@ -3913,8 +2816,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- struct pnv_phb *phb = hose->private_data;
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
if (!machine_is(powernv))
return;
@@ -3923,27 +2825,3 @@ static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
}
DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
-
-void __init pnv_pci_init_ioda_hub(struct device_node *np)
-{
- struct device_node *phbn;
- const __be64 *prop64;
- u64 hub_id;
-
- pr_info("Probing IODA IO-Hub %pOF\n", np);
-
- prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
- if (!prop64) {
- pr_err(" Missing \"ibm,opal-hubid\" property !\n");
- return;
- }
- hub_id = be64_to_cpup(prop64);
- pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
-
- /* Count child PHBs */
- for_each_child_of_node(np, phbn) {
- /* Look for IODA1 PHBs */
- if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
- pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
- }
-}