summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c101
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c7
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c515
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c77
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c28
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c88
-rw-r--r--arch/powerpc/platforms/powernv/pci.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci.h8
-rw-r--r--arch/powerpc/platforms/powernv/smp.c28
14 files changed, 743 insertions, 131 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 3732118a0482..6c9d5199a7e2 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
obj-$(CONFIG_PPC_FTW) += nx-ftw.o
+obj-$(CONFIG_OCXL_BASE) += ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4650fb294e7a..33c86c1a1720 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -43,6 +43,22 @@
static int eeh_event_irq = -EINVAL;
+void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!pdev->is_virtfn)
+ return;
+
+ /*
+ * The following operations will fail if VF's sysfs files
+ * aren't created or its resources aren't finalized.
+ */
+ eeh_add_device_early(pdn);
+ eeh_add_device_late(pdev);
+ eeh_sysfs_add_device(pdev);
+}
+
static int pnv_eeh_init(void)
{
struct pci_controller *hose;
@@ -86,6 +102,7 @@ static int pnv_eeh_init(void)
}
eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
return 0;
}
@@ -1638,70 +1655,11 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
return ret;
}
-static int pnv_eeh_restore_vf_config(struct pci_dn *pdn)
-{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
- u32 devctl, cmd, cap2, aer_capctl;
- int old_mps;
-
- if (edev->pcie_cap) {
- /* Restore MPS */
- old_mps = (ffs(pdn->mps) - 8) << 5;
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
- devctl |= old_mps;
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
-
- /* Disable Completion Timeout */
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
- 4, &cap2);
- if (cap2 & 0x10) {
- eeh_ops->read_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, &cap2);
- cap2 |= 0x10;
- eeh_ops->write_config(pdn,
- edev->pcie_cap + PCI_EXP_DEVCTL2,
- 4, cap2);
- }
- }
-
- /* Enable SERR and parity checking */
- eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
- cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
- eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
- /* Enable report various errors */
- if (edev->pcie_cap) {
- eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, &devctl);
- devctl &= ~PCI_EXP_DEVCTL_CERE;
- devctl |= (PCI_EXP_DEVCTL_NFERE |
- PCI_EXP_DEVCTL_FERE |
- PCI_EXP_DEVCTL_URRE);
- eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
- 2, devctl);
- }
-
- /* Enable ECRC generation and check */
- if (edev->pcie_cap && edev->aer_cap) {
- eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, &aer_capctl);
- aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
- eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
- 4, aer_capctl);
- }
-
- return 0;
-}
-
static int pnv_eeh_restore_config(struct pci_dn *pdn)
{
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
struct pnv_phb *phb;
- s64 ret;
+ s64 ret = 0;
int config_addr = (pdn->busno << 8) | (pdn->devfn);
if (!edev)
@@ -1715,7 +1673,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
* to be exported by firmware in extendible way.
*/
if (edev->physfn) {
- ret = pnv_eeh_restore_vf_config(pdn);
+ ret = eeh_restore_vf_config(pdn);
} else {
phb = pdn->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
@@ -1728,7 +1686,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
return -EIO;
}
- return 0;
+ return ret;
}
static struct eeh_ops pnv_eeh_ops = {
@@ -1746,25 +1704,10 @@ static struct eeh_ops pnv_eeh_ops = {
.read_config = pnv_eeh_read_config,
.write_config = pnv_eeh_write_config,
.next_error = pnv_eeh_next_error,
- .restore_config = pnv_eeh_restore_config
+ .restore_config = pnv_eeh_restore_config,
+ .notify_resume = NULL
};
-void pcibios_bus_add_device(struct pci_dev *pdev)
-{
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdev->is_virtfn)
- return;
-
- /*
- * The following operations will fail if VF's sysfs files
- * aren't created or its resources aren't finalized.
- */
- eeh_add_device_early(pdn);
- eeh_add_device_late(pdev);
- eeh_sysfs_add_device(pdev);
-}
-
#ifdef CONFIG_PCI_IOV
static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev)
{
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index f6cbc1a71472..0a253b64ac5f 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -39,7 +39,10 @@
*/
static struct pci_dev *get_pci_dev(struct device_node *dn)
{
- return PCI_DN(dn)->pcidev;
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
+ pdn->busno, pdn->devfn);
}
/* Given a NPU device get the associated PCI device. */
@@ -277,7 +280,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
int64_t rc = 0;
phys_addr_t top = memblock_end_of_DRAM();
- if (phb->type != PNV_PHB_NPU || !npe->pdev)
+ if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
return -EINVAL;
rc = pnv_npu_unset_window(npe, 0);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644
index 000000000000..fa9b53af3c7b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <asm/xive.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX 64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS 15
+#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+ u16 start;
+ u16 count;
+};
+
+struct npu_link {
+ struct list_head list;
+ int domain;
+ int bus;
+ int dev;
+ u16 fn_desired_actags[8];
+ struct actag_range fn_actags[8];
+ bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+ int vsec = pos;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u8 idx;
+
+ while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+ vsec))) {
+ pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+ int pos;
+ u32 val;
+
+ pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ if (val & AFU_PRESENT)
+ *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+ else
+ *afu_idx = -1;
+ return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+ int pos;
+ u16 actag_sup;
+
+ pos = find_dvsec_afu_ctrl(dev, afu_idx);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+ &actag_sup);
+ *actag = actag_sup & ACTAG_MASK;
+ return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+ struct npu_link *link;
+
+ list_for_each_entry(link, &links_list, list) {
+ /* The functions of a device all share the same link */
+ if (link->domain == pci_domain_nr(dev->bus) &&
+ link->bus == dev->bus->number &&
+ link->dev == PCI_SLOT(dev->devfn)) {
+ return link;
+ }
+ }
+
+ /* link doesn't exist yet. Allocate one */
+ link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+ if (!link)
+ return NULL;
+ link->domain = pci_domain_nr(dev->bus);
+ link->bus = dev->bus->number;
+ link->dev = PCI_SLOT(dev->devfn);
+ list_add(&link->list, &links_list);
+ return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct npu_link *link;
+ int rc, afu_idx = -1, i, actag;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ return;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_warn(&dev->dev, "couldn't update actag information\n");
+ mutex_unlock(&links_list_lock);
+ return;
+ }
+
+ /*
+ * Check how many actags are desired for the AFUs under that
+ * function and add it to the count for the link
+ */
+ rc = get_max_afu_index(dev, &afu_idx);
+ if (rc) {
+ /* Most likely an invalid config space */
+ dev_dbg(&dev->dev, "couldn't find AFU information\n");
+ afu_idx = -1;
+ }
+
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+ for (i = 0; i <= afu_idx; i++) {
+ /*
+ * AFU index 'holes' are allowed. So don't fail if we
+ * can't read the actag info for an index
+ */
+ rc = get_actag_count(dev, i, &actag);
+ if (rc)
+ continue;
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+ }
+ dev_dbg(&dev->dev, "total actags for function: %d\n",
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+ mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+ u16 count;
+
+ if (total <= PNV_OCXL_ACTAG_MAX)
+ count = desired;
+ else
+ count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+ return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+ u16 actag_count, range_start = 0, total_desired = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ total_desired += link->fn_desired_actags[i];
+
+ for (i = 0; i < 8; i++) {
+ if (link->fn_desired_actags[i]) {
+ actag_count = assign_fn_actags(
+ link->fn_desired_actags[i],
+ total_desired);
+ link->fn_actags[i].start = range_start;
+ link->fn_actags[i].count = actag_count;
+ range_start += actag_count;
+ WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+ }
+ pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+ link->domain, link->bus, link->dev, i,
+ link->fn_actags[i].start, link->fn_actags[i].count,
+ link->fn_desired_actags[i]);
+ }
+ link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ struct npu_link *link;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+ /*
+ * On p9, we only have 64 actags per link, so they must be
+ * shared by all the functions of the same adapter. We counted
+ * the desired actag counts during PCI enumeration, so that we
+ * can allocate a pro-rated number of actags to each function.
+ */
+ if (!link->assignment_done)
+ assign_actags(link);
+
+ *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+ *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+ *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+ mutex_unlock(&links_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+ struct npu_link *link;
+ int i, rc = -EINVAL;
+
+ /*
+ * The number of PASIDs (process address space ID) which can
+ * be used by a function depends on how many functions exist
+ * on the device. The NPU needs to be configured to know how
+ * many bits are available to PASIDs and how many are to be
+ * used by the function BDF indentifier.
+ *
+ * We only support one AFU-carrying function for now.
+ */
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+
+ for (i = 0; i < 8; i++)
+ if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+ *count = PNV_OCXL_PASID_MAX;
+ rc = 0;
+ break;
+ }
+
+ mutex_unlock(&links_list_lock);
+ dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+ rc ? 0 : *count);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+ int shift, idx;
+
+ WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+ idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+ buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size)
+{
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+ /*
+ * The TL capabilities are a characteristic of the NPU, so
+ * we go with hard-coded values.
+ *
+ * The receiving rate of each template is encoded on 4 bits.
+ *
+ * On P9:
+ * - templates 0 -> 3 are supported
+ * - templates 0, 1 and 3 have a 0 receiving rate
+ * - template 2 has receiving rate of 1 (extra cycle)
+ */
+ memset(rate_buf, 0, rate_buf_size);
+ set_templ_rate(2, 1, rate_buf);
+ *cap = PNV_OCXL_TL_P9_RECV_CAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+
+ rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+ rate_buf_phys, rate_buf_size);
+ if (rc) {
+ dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+ int rc;
+
+ rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Can't get translation interrupt for device\n");
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle)
+{
+ iounmap(dsisr);
+ iounmap(dar);
+ iounmap(tfc);
+ iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle)
+{
+ u64 reg;
+ int i, j, rc = 0;
+ void __iomem *regs[4];
+
+ /*
+ * opal stores the mmio addresses of the DSISR, DAR, TFC and
+ * PE_HANDLE registers in a device tree property, in that
+ * order
+ */
+ for (i = 0; i < 4; i++) {
+ rc = of_property_read_u64_index(dev->dev.of_node,
+ "ibm,opal-xsl-mmio", i, &reg);
+ if (rc)
+ break;
+ regs[i] = ioremap(reg, 8);
+ if (!regs[i]) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+ if (rc) {
+ dev_err(&dev->dev, "Can't map translation mmio registers\n");
+ for (j = i - 1; j >= 0; j--)
+ iounmap(regs[j]);
+ } else {
+ *dsisr = regs[0];
+ *dar = regs[1];
+ *tfc = regs[2];
+ *pe_handle = regs[3];
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+ u64 phb_opal_id;
+ u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct spa_data *data;
+ u32 bdfn;
+ int rc;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ bdfn = (dev->bus->number << 8) | dev->devfn;
+ rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+ PE_mask);
+ if (rc) {
+ dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+ kfree(data);
+ return rc;
+ }
+ data->phb_opal_id = phb->opal_id;
+ data->bdfn = bdfn;
+ *platform_data = (void *) data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+ WARN_ON(rc);
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
+
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
+{
+ __be64 flags, trigger_page;
+ s64 rc;
+ u32 hwirq;
+
+ hwirq = xive_native_alloc_irq();
+ if (!hwirq)
+ return -ENOENT;
+
+ rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
+ NULL);
+ if (rc || !trigger_page) {
+ xive_native_free_irq(hwirq);
+ return -ENOENT;
+ }
+ *irq = hwirq;
+ *trigger_addr = be64_to_cpu(trigger_page);
+ return 0;
+
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
+
+void pnv_ocxl_free_xive_irq(u32 irq)
+{
+ xive_native_free_irq(irq);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 4c827826c05e..0dc8fa4e0af2 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -103,9 +103,9 @@ static ssize_t dump_ack_store(struct dump_obj *dump_obj,
* due to the dynamic size of the dump
*/
static struct dump_attribute id_attribute =
- __ATTR(id, S_IRUGO, dump_id_show, NULL);
+ __ATTR(id, 0444, dump_id_show, NULL);
static struct dump_attribute type_attribute =
- __ATTR(type, S_IRUGO, dump_type_show, NULL);
+ __ATTR(type, 0444, dump_type_show, NULL);
static struct dump_attribute ack_attribute =
__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index ecd6d9177d13..ba6e437abb4b 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -83,9 +83,9 @@ static ssize_t elog_ack_store(struct elog_obj *elog_obj,
}
static struct elog_attribute id_attribute =
- __ATTR(id, S_IRUGO, elog_id_show, NULL);
+ __ATTR(id, 0444, elog_id_show, NULL);
static struct elog_attribute type_attribute =
- __ATTR(type, S_IRUGO, elog_type_show, NULL);
+ __ATTR(type, 0444, elog_type_show, NULL);
static struct elog_attribute ack_attribute =
__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 465ea105b771..dd4c9b8b8a81 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,78 @@
#include <asm/io.h>
#include <asm/imc-pmu.h>
#include <asm/cputhreads.h>
+#include <asm/debugfs.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+ *val = cpu_to_be64(*(u64 *)data);
+ return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+ *(u64 *)data = cpu_to_be64(val);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file_unsafe(name, mode, parent,
+ value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ * for imc_cmd and imc_mode
+ * for each node in the system.
+ * imc_mode and imc_cmd can be changed by echo into
+ * this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+ struct imc_pmu *pmu_ptr)
+{
+ static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+ int chip = 0, nid;
+ char mode[16], cmd[16];
+ u32 cb_offset;
+
+ imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+
+ /*
+ * Return here, either because 'imc' directory already exists,
+ * Or failed to create a new one.
+ */
+ if (!imc_debugfs_parent)
+ return;
+
+ if (of_property_read_u32(node, "cb_offset", &cb_offset))
+ cb_offset = IMC_CNTL_BLK_OFFSET;
+
+ for_each_node(nid) {
+ loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+ sprintf(mode, "imc_mode_%d", nid);
+ if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+ imc_mode_addr))
+ goto err;
+
+ imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+ sprintf(cmd, "imc_cmd_%d", nid);
+ if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+ imc_cmd_addr))
+ goto err;
+ chip++;
+ }
+ return;
+
+err:
+ debugfs_remove_recursive(imc_debugfs_parent);
+}
/*
* imc_get_mem_addr_nest: Function to get nest counter memory region
@@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
pmu_ptr->imc_counter_mmaped = true;
+ export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
}
+ /* If none of the nest units are registered, remove debugfs interface */
+ if (pmu_count == 0)
+ debugfs_remove_recursive(imc_debugfs_parent);
+
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
index 23fb6647dced..6fd4092798d5 100644
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -260,13 +260,13 @@ void __init opal_sys_param_init(void)
/* If the parameter is read-only or read-write */
switch (perm[i] & 3) {
case OPAL_SYSPARAM_READ:
- attr[i].kobj_attr.attr.mode = S_IRUGO;
+ attr[i].kobj_attr.attr.mode = 0444;
break;
case OPAL_SYSPARAM_WRITE:
- attr[i].kobj_attr.attr.mode = S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0200;
break;
case OPAL_SYSPARAM_RW:
- attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+ attr[i].kobj_attr.attr.mode = 0644;
break;
default:
break;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..1b2936ba6040 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,6 @@ OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 041ddbd1fc57..c15182765ff5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPAL;
- pr_info("OPAL detected !\n");
+ pr_debug("OPAL detected !\n");
} else {
panic("OPAL != V3 detected, no longer supported.\n");
}
@@ -239,8 +239,8 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
struct notifier_block *nb)
{
if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
- pr_warning("%s: Invalid arguments, msg_type:%d\n",
- __func__, msg_type);
+ pr_warn("%s: Invalid arguments, msg_type:%d\n",
+ __func__, msg_type);
return -EINVAL;
}
@@ -281,8 +281,8 @@ static void opal_handle_message(void)
/* check for errors. */
if (ret) {
- pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
- __func__, ret);
+ pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+ __func__, ret);
return;
}
@@ -461,24 +461,14 @@ static int opal_recover_mce(struct pt_regs *regs,
void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
{
- /*
- * This is mostly taken from kernel/panic.c, but tries to do
- * relatively minimal work. Don't use delay functions (TB may
- * be broken), don't crash dump (need to set a firmware log),
- * don't run notifiers. We do want to get some information to
- * Linux console.
- */
- console_verbose();
- bust_spinlocks(1);
+ panic_flush_kmsg_start();
+
pr_emerg("Hardware platform error: %s\n", msg);
if (regs)
show_regs(regs);
smp_send_stop();
- printk_safe_flush_on_panic();
- kmsg_dump(KMSG_DUMP_PANIC);
- bust_spinlocks(0);
- debug_locks_off();
- console_flush_on_panic();
+
+ panic_flush_kmsg_end();
/*
* Don't bother to shut things down because this will
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9582aeb1fe4c..496e47696ed0 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -54,7 +54,8 @@
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
#define POWERNV_IOMMU_MAX_LEVELS 5
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
+ "NPU_OCAPI" };
static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -89,6 +90,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
}
static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
static int __init iommu_setup(char *str)
{
@@ -110,6 +112,14 @@ static int __init iommu_setup(char *str)
}
early_param("iommu", iommu_setup);
+static int __init pci_reset_phbs_setup(char *str)
+{
+ pci_reset_phbs = true;
+ return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
{
/*
@@ -924,7 +934,7 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
@@ -1059,8 +1069,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available, disabling device\n",
- pci_name(dev));
+ pr_warn("%s: Not enough PE# available, disabling device\n",
+ pci_name(dev));
return NULL;
}
@@ -1072,7 +1082,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
* At some point we want to remove the PDN completely anyways
*/
pci_dev_get(dev);
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
@@ -1119,7 +1128,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
continue;
pe->device_count++;
- pdn->pcidev = dev;
pdn->pe_number = pe->pe_number;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_same_PE(dev->subordinate, pe);
@@ -1164,7 +1172,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe = pnv_ioda_alloc_pe(phb);
if (!pe) {
- pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+ pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
__func__, pci_domain_nr(bus), bus->number);
return NULL;
}
@@ -1234,7 +1242,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
pci_dev_get(npu_pdev);
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pcidev = npu_pdev;
npu_pdn->pe_number = pe_num;
phb->ioda.pe_rmap[rid] = pe->pe_number;
@@ -1272,16 +1279,23 @@ static void pnv_pci_ioda_setup_PEs(void)
{
struct pci_controller *hose, *tmp;
struct pnv_phb *phb;
+ struct pci_bus *bus;
+ struct pci_dev *pdev;
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU) {
+ if (phb->type == PNV_PHB_NPU_NVLINK) {
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
if (phb->model == PNV_PHB_MODEL_NPU2)
pnv_npu2_init(phb);
}
+ if (phb->type == PNV_PHB_NPU_OCAPI) {
+ bus = hose->bus;
+ list_for_each_entry(pdev, &bus->devices, bus_list)
+ pnv_ioda_setup_dev_PE(pdev);
+ }
}
}
@@ -1692,7 +1706,7 @@ m64_failed:
return ret;
}
-int pcibios_sriov_disable(struct pci_dev *pdev)
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
{
pnv_pci_sriov_disable(pdev);
@@ -1701,7 +1715,7 @@ int pcibios_sriov_disable(struct pci_dev *pdev)
return 0;
}
-int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
{
/* Allocate PCI data */
add_dev_pci_data(pdev);
@@ -2572,7 +2586,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
unsigned long direct_table_size;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
- (window_size > memory_hotplug_max()) ||
!is_power_of_2(window_size))
return 0;
@@ -2640,7 +2653,7 @@ static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
hose = pci_bus_to_host(pdev->bus);
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
return 0;
*ptmppe = &phb->ioda.pe_array[pdn->pe_number];
@@ -2724,7 +2737,7 @@ static void pnv_pci_ioda_setup_iommu_api(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU)
+ if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3293,7 +3306,7 @@ static void pnv_pci_ioda_create_dbgfs(void)
sprintf(name, "PCI%04x", hose->global_number);
phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
if (!phb->dbgfs) {
- pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+ pr_warn("%s: Error on creating debugfs on PHB#%x\n",
__func__, hose->global_number);
continue;
}
@@ -3774,6 +3787,13 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
#ifdef CONFIG_CXL_BASE
const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
@@ -4007,9 +4027,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- if (phb->type == PNV_PHB_NPU) {
+ switch (phb->type) {
+ case PNV_PHB_NPU_NVLINK:
hose->controller_ops = pnv_npu_ioda_controller_ops;
- } else {
+ break;
+ case PNV_PHB_NPU_OCAPI:
+ hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+ break;
+ default:
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
hose->controller_ops = pnv_pci_ioda_controller_ops;
}
@@ -4019,6 +4044,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+ ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
#endif
pci_add_flags(PCI_REASSIGN_ALL_RSRC);
@@ -4026,15 +4053,16 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Reset IODA tables to a clean state */
rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
if (rc)
- pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
+ pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc);
/*
* If we're running in kdump kernel, the previous kernel never
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
- * transactions from previous kernel.
+ * transactions from previous kernel. The ppc_pci_reset_phbs
+ * kernel parameter will force this reset too.
*/
- if (is_kdump_kernel()) {
+ if (is_kdump_kernel() || pci_reset_phbs) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
@@ -4052,8 +4080,26 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
void __init pnv_pci_init_npu_phb(struct device_node *np)
{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type == PNV_PHB_NPU_OCAPI)
+ dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
void __init pnv_pci_init_ioda_hub(struct device_node *np)
{
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 5422f4a6317c..69d102cbf48f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1142,6 +1142,10 @@ void __init pnv_pci_init(void)
for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
pnv_pci_init_npu_phb(np);
+ /* Look for NPU2 OpenCAPI PHBs */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+ pnv_pci_init_npu2_opencapi_phb(np);
+
/* Configure IOMMU DMA hooks */
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b772d7473896..eada4b6068cb 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -12,9 +12,10 @@ struct pci_dn;
#define NV_NMMU_ATSD_REGS 8
enum pnv_phb_type {
- PNV_PHB_IODA1 = 0,
- PNV_PHB_IODA2 = 1,
- PNV_PHB_NPU = 2,
+ PNV_PHB_IODA1 = 0,
+ PNV_PHB_IODA2 = 1,
+ PNV_PHB_NPU_NVLINK = 2,
+ PNV_PHB_NPU_OCAPI = 3,
};
/* Precise PHB model for error management */
@@ -227,6 +228,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ba030669eca1..9664c8461f03 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -37,6 +37,8 @@
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
#include "powernv.h"
@@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
}
+
smp_mb();
+ /*
+ * For kdump kernels, we process the ipi and jump to
+ * crash_ipi_callback
+ */
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKERESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ crash_ipi_callback(&regs);
+ /* Does not return */
+ }
+
if (cpu_core_split_required())
continue;
@@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
#ifdef CONFIG_HOTPLUG_CPU
ppc_md.cpu_die = pnv_smp_cpu_kill_self;
+#ifdef CONFIG_KEXEC_CORE
+ crash_wake_offline = 1;
+#endif
#endif
}