summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWei Yang <weiyang@linux.vnet.ibm.com>2015-03-25 16:23:57 +0800
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2015-03-31 13:02:38 +1100
commit781a868f3136c6eb8e8c5c19d148416d7da86610 (patch)
tree941341c0c52f2669b73b5ec619aeff5e31826b4e
parent5350ab3fd794f899079d9f6b2b6fe1a7917087ef (diff)
powerpc/powernv: Shift VF resource with an offset
On PowerNV platform, resource position in M64 BAR implies the PE# the resource belongs to. In some cases, adjustment of a resource is necessary to locate it to a correct position in M64 BAR . This patch adds pnv_pci_vf_resource_shift() to shift the 'real' PF IOV BAR address according to an offset. Note: After doing so, there would be a "hole" in the /proc/iomem when offset is a positive value. It looks like the device return some mmio back to the system, which actually no one could use it. [bhelgaas: rework loops, rework overlap check, index resource[] conventionally, remove pci_regs.h include, squashed with next patch] Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h4
-rw-r--r--arch/powerpc/kernel/pci_dn.c13
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c528
-rw-r--r--arch/powerpc/platforms/powernv/pci.c18
-rw-r--r--arch/powerpc/platforms/powernv/pci.h7
5 files changed, 553 insertions, 17 deletions
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 7b8ebc5929ff..8716db48e946 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -180,6 +180,10 @@ struct pci_dn {
int pe_number;
#ifdef CONFIG_PCI_IOV
u16 vfs_expanded; /* number of VFs IOV BAR expanded */
+ u16 num_vfs; /* number of VFs enabled*/
+ int offset; /* PE# for the first VF PE */
+#define IODA_INVALID_M64 (-1)
+ int m64_wins[PCI_SRIOV_NUM_BARS];
#endif /* CONFIG_PCI_IOV */
#endif
struct list_head child_list;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index e5f1d78ef7cf..b3b4df91b792 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -217,6 +217,19 @@ void remove_dev_pci_data(struct pci_dev *pdev)
struct pci_dn *pdn, *tmp;
int i;
+ /*
+ * VF and VF PE are created/released dynamically, so we need to
+ * bind/unbind them. Otherwise the VF and VF PE would be mismatched
+ * when re-enabling SR-IOV.
+ */
+ if (pdev->is_virtfn) {
+ pdn = pci_get_pdn(pdev);
+#ifdef CONFIG_PPC_POWERNV
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
+ return;
+ }
+
/* Only support IOV PF for now */
if (!pdev->is_physfn)
return;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 217eaad23cde..5187d164cfe1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -44,6 +44,9 @@
#include "powernv.h"
#include "pci.h"
+/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
+#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+
static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
@@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
vaf.fmt = fmt;
vaf.va = &args;
- if (pe->pdev)
+ if (pe->flags & PNV_IODA_PE_DEV)
strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
- else
+ else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
sprintf(pfix, "%04x:%02x ",
pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+ else if (pe->flags & PNV_IODA_PE_VF)
+ sprintf(pfix, "%04x:%02x:%2x.%d",
+ pci_domain_nr(pe->parent_dev->bus),
+ (pe->rid & 0xff00) >> 8,
+ PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
printk("%spci %s: [PE# %.3d] %pV",
level, pfix, pe->pe_number, &vaf);
@@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
bool is_add)
{
struct pnv_ioda_pe *slave;
- struct pci_dev *pdev;
+ struct pci_dev *pdev = NULL;
int ret;
/*
@@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
pdev = pe->pbus->self;
- else
+ else if (pe->flags & PNV_IODA_PE_DEV)
pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+ else if (pe->flags & PNV_IODA_PE_VF)
+ pdev = pe->parent_dev->bus->self;
+#endif /* CONFIG_PCI_IOV */
while (pdev) {
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *parent;
@@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
return 0;
}
+#ifdef CONFIG_PCI_IOV
+static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+ struct pci_dev *parent;
+ uint8_t bcomp, dcomp, fcomp;
+ int64_t rc;
+ long rid_end, rid;
+
+ /* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+ if (pe->pbus) {
+ int count;
+
+ dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+ parent = pe->pbus->self;
+ if (pe->flags & PNV_IODA_PE_BUS_ALL)
+ count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
+ else
+ count = 1;
+
+ switch(count) {
+ case 1: bcomp = OpalPciBusAll; break;
+ case 2: bcomp = OpalPciBus7Bits; break;
+ case 4: bcomp = OpalPciBus6Bits; break;
+ case 8: bcomp = OpalPciBus5Bits; break;
+ case 16: bcomp = OpalPciBus4Bits; break;
+ case 32: bcomp = OpalPciBus3Bits; break;
+ default:
+ dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+ count);
+ /* Do an exact match only */
+ bcomp = OpalPciBusAll;
+ }
+ rid_end = pe->rid + (count << 8);
+ } else {
+ if (pe->flags & PNV_IODA_PE_VF)
+ parent = pe->parent_dev;
+ else
+ parent = pe->pdev->bus->self;
+ bcomp = OpalPciBusAll;
+ dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+ rid_end = pe->rid + 1;
+ }
+
+ /* Clear the reverse map */
+ for (rid = pe->rid; rid < rid_end; rid++)
+ phb->ioda.pe_rmap[rid] = 0;
+
+ /* Release from all parents PELT-V */
+ while (parent) {
+ struct pci_dn *pdn = pci_get_pdn(parent);
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+ pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+ /* XXX What to do in case of error ? */
+ }
+ parent = parent->bus->self;
+ }
+
+ opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+ /* Disassociate PE in PELT */
+ rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+ pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+ if (rc)
+ pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+ rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+ bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+ if (rc)
+ pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+
+ pe->pbus = NULL;
+ pe->pdev = NULL;
+ pe->parent_dev = NULL;
+
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
{
struct pci_dev *parent;
@@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
case 16: bcomp = OpalPciBus4Bits; break;
case 32: bcomp = OpalPciBus3Bits; break;
default:
- pr_err("%s: Number of subordinate busses %d"
- " unsupported\n",
- pci_name(pe->pbus->self), count);
+ dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+ count);
/* Do an exact match only */
bcomp = OpalPciBusAll;
}
rid_end = pe->rid + (count << 8);
} else {
- parent = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+ if (pe->flags & PNV_IODA_PE_VF)
+ parent = pe->parent_dev;
+ else
+#endif /* CONFIG_PCI_IOV */
+ parent = pe->pdev->bus->self;
bcomp = OpalPciBusAll;
dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
@@ -774,6 +873,78 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
return 10;
}
+#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+ struct pci_dn *pdn = pci_get_pdn(dev);
+ int i;
+ struct resource *res, res2;
+ resource_size_t size;
+ u16 num_vfs;
+
+ if (!dev->is_physfn)
+ return -EINVAL;
+
+ /*
+ * "offset" is in VFs. The M64 windows are sized so that when they
+ * are segmented, each segment is the same size as the IOV BAR.
+ * Each segment is in a separate PE, and the high order bits of the
+ * address are the PE number. Therefore, each VF's BAR is in a
+ * separate PE, and changing the IOV BAR start address changes the
+ * range of PEs the VFs are in.
+ */
+ num_vfs = pdn->num_vfs;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &dev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+
+ if (!pnv_pci_is_mem_pref_64(res->flags))
+ continue;
+
+ /*
+ * The actual IOV BAR range is determined by the start address
+ * and the actual size for num_vfs VFs BAR. This check is to
+ * make sure that after shifting, the range will not overlap
+ * with another device.
+ */
+ size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+ res2.flags = res->flags;
+ res2.start = res->start + (size * offset);
+ res2.end = res2.start + (size * num_vfs) - 1;
+
+ if (res2.end > res->end) {
+ dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+ i, &res2, res, num_vfs, offset);
+ return -EBUSY;
+ }
+ }
+
+ /*
+ * After doing so, there would be a "hole" in the /proc/iomem when
+ * offset is a positive value. It looks like the device return some
+ * mmio back to the system, which actually no one could use it.
+ */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &dev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+
+ if (!pnv_pci_is_mem_pref_64(res->flags))
+ continue;
+
+ size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+ res2 = *res;
+ res->start += size * offset;
+
+ dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
+ i, &res2, res, num_vfs, offset);
+ pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+ }
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
#if 0
static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
{
@@ -979,8 +1150,316 @@ static void pnv_pci_ioda_setup_PEs(void)
}
#ifdef CONFIG_PCI_IOV
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ int i;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ pdn = pci_get_pdn(pdev);
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ if (pdn->m64_wins[i] == IODA_INVALID_M64)
+ continue;
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0);
+ clear_bit(pdn->m64_wins[i], &phb->ioda.m64_bar_alloc);
+ pdn->m64_wins[i] = IODA_INVALID_M64;
+ }
+
+ return 0;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ unsigned int win;
+ struct resource *res;
+ int i;
+ int64_t rc;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ pdn = pci_get_pdn(pdev);
+
+ /* Initialize the m64_wins to IODA_INVALID_M64 */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+ pdn->m64_wins[i] = IODA_INVALID_M64;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+
+ if (!pnv_pci_is_mem_pref_64(res->flags))
+ continue;
+
+ do {
+ win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+ phb->ioda.m64_bar_idx + 1, 0);
+
+ if (win >= phb->ioda.m64_bar_idx + 1)
+ goto m64_failed;
+ } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+ pdn->m64_wins[i] = win;
+
+ /* Map the M64 here */
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ pdn->m64_wins[i],
+ res->start,
+ 0, /* unused */
+ resource_size(res));
+ if (rc != OPAL_SUCCESS) {
+ dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
+ win, rc);
+ goto m64_failed;
+ }
+
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1);
+ if (rc != OPAL_SUCCESS) {
+ dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
+ win, rc);
+ goto m64_failed;
+ }
+ }
+ return 0;
+
+m64_failed:
+ pnv_pci_vf_release_m64(pdev);
+ return -EBUSY;
+}
+
+static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct iommu_table *tbl;
+ unsigned long addr;
+ int64_t rc;
+
+ bus = dev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ tbl = pe->tce32_table;
+ addr = tbl->it_base;
+
+ opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+ pe->pe_number << 1, 1, __pa(addr),
+ 0, 0x1000);
+
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + 1,
+ pe->tce_bypass_base,
+ 0);
+ if (rc)
+ pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+
+ iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+ free_pages(addr, get_order(TCE32_TABLE_SIZE));
+ pe->tce32_table = NULL;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe, *pe_n;
+ struct pci_dn *pdn;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+
+ if (!pdev->is_physfn)
+ return;
+
+ pdn = pci_get_pdn(pdev);
+ list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+ if (pe->parent_dev != pdev)
+ continue;
+
+ pnv_pci_ioda2_release_dma_pe(pdev, pe);
+
+ /* Remove from list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_del(&pe->list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
+ pnv_ioda_deconfigure_pe(phb, pe);
+
+ pnv_ioda_free_pe(phb, pe->pe_number);
+ }
+}
+
+void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ struct pci_sriov *iov;
+ u16 num_vfs;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ pdn = pci_get_pdn(pdev);
+ iov = pdev->sriov;
+ num_vfs = pdn->num_vfs;
+
+ /* Release VF PEs */
+ pnv_ioda_release_vf_PE(pdev);
+
+ if (phb->type == PNV_PHB_IODA2) {
+ pnv_pci_vf_resource_shift(pdev, -pdn->offset);
+
+ /* Release M64 windows */
+ pnv_pci_vf_release_m64(pdev);
+
+ /* Release PE numbers */
+ bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+ pdn->offset = 0;
+ }
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe);
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
+ int pe_num;
+ u16 vf_index;
+ struct pci_dn *pdn;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_physfn)
+ return;
+
+ /* Reserve PE for each VF */
+ for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ pe_num = pdn->offset + vf_index;
+
+ pe = &phb->ioda.pe_array[pe_num];
+ pe->pe_number = pe_num;
+ pe->phb = phb;
+ pe->flags = PNV_IODA_PE_VF;
+ pe->pbus = NULL;
+ pe->parent_dev = pdev;
+ pe->tce32_seg = -1;
+ pe->mve_number = -1;
+ pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
+ pci_iov_virtfn_devfn(pdev, vf_index);
+
+ pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
+ hose->global_number, pdev->bus->number,
+ PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+ PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
+
+ if (pnv_ioda_configure_pe(phb, pe)) {
+ /* XXX What do we do here ? */
+ if (pe_num)
+ pnv_ioda_free_pe(phb, pe_num);
+ pe->pdev = NULL;
+ continue;
+ }
+
+ pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
+ GFP_KERNEL, hose->node);
+ pe->tce32_table->data = pe;
+
+ /* Put PE to the list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
+ pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ }
+}
+
+int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pci_dn *pdn;
+ int ret;
+
+ bus = pdev->bus;
+ hose = pci_bus_to_host(bus);
+ phb = hose->private_data;
+ pdn = pci_get_pdn(pdev);
+
+ if (phb->type == PNV_PHB_IODA2) {
+ /* Calculate available PE for required VFs */
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ pdn->offset = bitmap_find_next_zero_area(
+ phb->ioda.pe_alloc, phb->ioda.total_pe,
+ 0, num_vfs, 0);
+ if (pdn->offset >= phb->ioda.total_pe) {
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
+ pdn->offset = 0;
+ return -EBUSY;
+ }
+ bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+ pdn->num_vfs = num_vfs;
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+ /* Assign M64 window accordingly */
+ ret = pnv_pci_vf_assign_m64(pdev);
+ if (ret) {
+ dev_info(&pdev->dev, "Not enough M64 window resources\n");
+ goto m64_failed;
+ }
+
+ /*
+ * When using one M64 BAR to map one IOV BAR, we need to shift
+ * the IOV BAR according to the PE# allocated to the VFs.
+ * Otherwise, the PE# for the VF will conflict with others.
+ */
+ ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
+ if (ret)
+ goto m64_failed;
+ }
+
+ /* Setup VF PEs */
+ pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+ return 0;
+
+m64_failed:
+ bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs);
+ pdn->offset = 0;
+
+ return ret;
+}
+
int pcibios_sriov_disable(struct pci_dev *pdev)
{
+ pnv_pci_sriov_disable(pdev);
+
/* Release PCI data */
remove_dev_pci_data(pdev);
return 0;
@@ -990,6 +1469,8 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
add_dev_pci_data(pdev);
+
+ pnv_pci_sriov_enable(pdev, num_vfs);
return 0;
}
#endif /* CONFIG_PCI_IOV */
@@ -1186,9 +1667,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
int64_t rc;
void *addr;
- /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
-#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
-
/* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */
@@ -1251,12 +1729,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
TCE_PCI_SWINV_PAIR);
}
iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
- if (pe->pdev)
+ if (pe->flags & PNV_IODA_PE_DEV) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- else
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ } else if (pe->flags & PNV_IODA_PE_VF) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
+ }
return;
fail:
@@ -1383,12 +1868,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
}
iommu_init_table(tbl, phb->hose->node);
- iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
- if (pe->pdev)
+ if (pe->flags & PNV_IODA_PE_DEV) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- else
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ } else if (pe->flags & PNV_IODA_PE_VF) {
+ iommu_register_group(tbl, phb->hose->global_number,
+ pe->pe_number);
+ }
/* Also create a bypass window */
if (!pnv_iommu_bypass_disabled)
@@ -2068,6 +2560,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->hub_id = hub_id;
phb->opal_id = phb_id;
phb->type = ioda_type;
+ mutex_init(&phb->ioda.pe_alloc_mutex);
/* Detect specific models for error handling */
if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
@@ -2127,6 +2620,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
INIT_LIST_HEAD(&phb->ioda.pe_list);
+ mutex_init(&phb->ioda.pe_list_mutex);
/* Calculate how many 32-bit TCE segments we have */
phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 946aa3d62c3c..02badcef5cea 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -714,6 +714,24 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
+#ifdef CONFIG_PCI_IOV
+ struct pnv_ioda_pe *pe;
+ struct pci_dn *pdn;
+
+ /* Fix the VF pdn PE number */
+ if (pdev->is_virtfn) {
+ pdn = pci_get_pdn(pdev);
+ WARN_ON(pdn->pe_number != IODA_INVALID_PE);
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (pe->rid == ((pdev->bus->number << 8) |
+ (pdev->devfn & 0xff))) {
+ pdn->pe_number = pe->pe_number;
+ pe->pdev = pdev;
+ break;
+ }
+ }
+ }
+#endif /* CONFIG_PCI_IOV */
/* If we have no phb structure, try to setup a fallback based on
* the device-tree (RTAS PCI for example)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 84280474e18f..070ee888fc95 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -23,6 +23,7 @@ enum pnv_phb_model {
#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
+#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
@@ -34,6 +35,9 @@ struct pnv_ioda_pe {
* entire bus (& children). In the former case, pdev
* is populated, in the later case, pbus is.
*/
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *parent_dev;
+#endif
struct pci_dev *pdev;
struct pci_bus *pbus;
@@ -145,6 +149,8 @@ struct pnv_phb {
/* PE allocation bitmap */
unsigned long *pe_alloc;
+ /* PE allocation mutex */
+ struct mutex pe_alloc_mutex;
/* M32 & IO segment maps */
unsigned int *m32_segmap;
@@ -159,6 +165,7 @@ struct pnv_phb {
* on the sequence of creation
*/
struct list_head pe_list;
+ struct mutex pe_list_mutex;
/* Reverse map of PEs, will have to extend if
* we are to support more than 256 PEs, indexed