summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv/pci-ioda.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-07 12:56:26 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-07 12:56:26 -0800
commit6c3ac1134371b51c9601171af2c32153ccb11100 (patch)
tree4e9d69885b3a3aa22fb0d44941d88d05d914f976 /arch/powerpc/platforms/powernv/pci-ioda.c
parentd72cb8c7d9dbd9ce820c80f3fddb56b296ba96fc (diff)
parent9580b71b5a7863c24a9bd18bcd2ad759b86b1eff (diff)
Merge tag 'powerpc-5.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Notable changes: - Enable THREAD_INFO_IN_TASK to move thread_info off the stack. - A big series from Christoph reworking our DMA code to use more of the generic infrastructure, as he said: "This series switches the powerpc port to use the generic swiotlb and noncoherent dma ops, and to use more generic code for the coherent direct mapping, as well as removing a lot of dead code." - Increase our vmalloc space to 512T with the Hash MMU on modern CPUs, allowing us to support machines with larger amounts of total RAM or distance between nodes. - Two series from Christophe, one to optimise TLB miss handlers on 6xx, and another to optimise the way STRICT_KERNEL_RWX is implemented on some 32-bit CPUs. - Support for KCOV coverage instrumentation which means we can run syzkaller and discover even more bugs in our code. And as always many clean-ups, reworks and minor fixes etc. Thanks to: Alan Modra, Alexey Kardashevskiy, Alistair Popple, Andrea Arcangeli, Andrew Donnellan, Aneesh Kumar K.V, Aravinda Prasad, Balbir Singh, Brajeswar Ghosh, Breno Leitao, Christian Lamparter, Christian Zigotzky, Christophe Leroy, Christoph Hellwig, Corentin Labbe, Daniel Axtens, David Gibson, Diana Craciun, Firoz Khan, Gustavo A. R. Silva, Igor Stoppa, Joe Lawrence, Joel Stanley, Jonathan Neuschäfer, Jordan Niethe, Laurent Dufour, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Masahiro Yamada, Mathieu Malaterre, Matteo Croce, Meelis Roos, Michael W. Bringmann, Nathan Chancellor, Nathan Fontenot, Nicholas Piggin, Nick Desaulniers, Nicolai Stange, Oliver O'Halloran, Paul Mackerras, Peter Xu, PrasannaKumar Muralidharan, Qian Cai, Rashmica Gupta, Reza Arbab, Robert P. J. Day, Russell Currey, Sabyasachi Gupta, Sam Bobroff, Sandipan Das, Sergey Senozhatsky, Souptick Joarder, Stewart Smith, Tyrel Datwyler, Vaibhav Jain, YueHaibing" * tag 'powerpc-5.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (200 commits) powerpc/32: Clear on-stack exception marker upon exception return powerpc: Remove export of save_stack_trace_tsk_reliable() powerpc/mm: fix "section_base" set but not used powerpc/mm: Fix "sz" set but not used warning powerpc/mm: Check secondary hash page table powerpc: remove nargs from __SYSCALL powerpc/64s: Fix unrelocated interrupt trampoline address test powerpc/powernv/ioda: Fix locked_vm counting for memory used by IOMMU tables powerpc/fsl: Fix the flush of branch predictor. powerpc/powernv: Make opal log only readable by root powerpc/xmon: Fix opcode being uninitialized in print_insn_powerpc powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C powerpc/64s: Fix data interrupts vs d-side MCE reentrancy powerpc/64s: Prepare to handle data interrupts vs d-side MCE reentrancy powerpc/64s: system reset interrupt preserve HSRRs powerpc/64s: Fix HV NMI vs HV interrupt recoverability test powerpc/mm/hash: Handle mmap_min_addr correctly in get_unmapped_area topdown search powerpc/hugetlb: Handle mmap_min_addr correctly in get_unmapped_area callback selftests/powerpc: Remove duplicate header powerpc sstep: Add support for modsd, modud instructions ...
Diffstat (limited to 'arch/powerpc/platforms/powernv/pci-ioda.c')
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c146
1 files changed, 38 insertions, 108 deletions
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 145373f0e5dc..fa6af52b5219 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1748,7 +1748,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+ pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
/*
* Note: iommu_add_device() will fail here as
@@ -1758,31 +1758,6 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}
-static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
-{
- unsigned short vendor = 0;
- struct pci_dev *pdev;
-
- if (pe->device_count == 1)
- return true;
-
- /* pe->pdev should be set if it's a single device, pe->pbus if not */
- if (!pe->pbus)
- return true;
-
- list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
- if (!vendor) {
- vendor = pdev->vendor;
- continue;
- }
-
- if (pdev->vendor != vendor)
- return false;
- }
-
- return true;
-}
-
/*
* Reconfigure TVE#0 to be usable as 64-bit DMA space.
*
@@ -1852,88 +1827,45 @@ err:
return -EIO;
}
-static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
+ u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
- uint64_t top;
- bool bypass = false;
- s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
- top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
- bypass = (dma_mask >= top);
+ u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ if (dma_mask >= top)
+ return true;
}
- if (bypass) {
- dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else {
- /*
- * If the device can't set the TCE bypass bit but still wants
- * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
- * bypass the 32-bit region and be usable for 64-bit DMAs.
- * The device needs to be able to address all of this space.
- */
- if (dma_mask >> 32 &&
- dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
- pnv_pci_ioda_pe_single_vendor(pe) &&
- phb->model == PNV_PHB_MODEL_PHB3) {
- /* Configure the bypass mode */
- rc = pnv_pci_ioda_dma_64bit_bypass(pe);
- if (rc)
- return rc;
- /* 4GB offset bypasses 32-bit space */
- set_dma_offset(&pdev->dev, (1ULL << 32));
- set_dma_ops(&pdev->dev, &dma_nommu_ops);
- } else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
- /*
- * Fail the request if a DMA mask between 32 and 64 bits
- * was requested but couldn't be fulfilled. Ideally we
- * would do this for 64-bits but historically we have
- * always fallen back to 32-bits.
- */
- return -ENOMEM;
- } else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
- }
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ (pe->device_count == 1 || !pe->pbus) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return rc;
+ /* 4GB offset bypasses 32-bit space */
+ pdev->dev.archdata.dma_offset = (1ULL << 32);
+ return true;
}
- *pdev->dev.dma_mask = dma_mask;
- /* Update peer npu devices */
- pnv_npu_try_dma_set_bypass(pdev, bypass);
-
- return 0;
-}
-
-static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
- struct pci_dn *pdn = pci_get_pdn(pdev);
- struct pnv_ioda_pe *pe;
- u64 end, mask;
-
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return 0;
-
- pe = &phb->ioda.pe_array[pdn->pe_number];
- if (!pe->tce_bypass_enabled)
- return __dma_get_required_mask(&pdev->dev);
-
-
- end = pe->tce_bypass_base + memblock_end_of_DRAM();
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
+ return false;
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
@@ -1942,7 +1874,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
- set_dma_offset(&dev->dev, pe->tce_bypass_base);
+ dev->dev.archdata.dma_offset = pe->tce_bypass_base;
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
pnv_ioda_setup_bus_dma(pe, dev->subordinate);
@@ -2594,8 +2526,13 @@ static long pnv_pci_ioda2_create_table_userspace(
int num, __u32 page_shift, __u64 window_size, __u32 levels,
struct iommu_table **ptbl)
{
- return pnv_pci_ioda2_create_table(table_group,
+ long ret = pnv_pci_ioda2_create_table(table_group,
num, page_shift, window_size, levels, true, ptbl);
+
+ if (!ret)
+ (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
+ page_shift, window_size, levels);
+ return ret;
}
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
@@ -3661,6 +3598,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
+ .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
.enable_device_hook = pnv_pci_enable_device_hook,
@@ -3668,19 +3606,9 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.window_alignment = pnv_pci_window_alignment,
.setup_bridge = pnv_pci_setup_bridge,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_pci_ioda_dma_set_mask,
- .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
.shutdown = pnv_pci_ioda_shutdown,
};
-static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
-{
- dev_err_once(&npdev->dev,
- "%s operation unsupported for NVLink devices\n",
- __func__);
- return -EPERM;
-}
-
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.setup_msi_irqs = pnv_setup_msi_irqs,
@@ -3688,7 +3616,6 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
.disable_device = pnv_npu_disable_device,
};
@@ -3946,9 +3873,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
* shutdown PCI devices correctly. We already got IODA table
* cleaned out. So we have to issue PHB reset to stop all PCI
* transactions from previous kernel. The ppc_pci_reset_phbs
- * kernel parameter will force this reset too.
+ * kernel parameter will force this reset too. Additionally,
+ * if the IODA reset above failed then use a bigger hammer.
+ * This can happen if we get a PHB fatal error in very early
+ * boot.
*/
- if (is_kdump_kernel() || pci_reset_phbs) {
+ if (is_kdump_kernel() || pci_reset_phbs || rc) {
pr_info(" Issue PHB reset ...\n");
pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);