diff options
Diffstat (limited to 'arch/powerpc/sysdev/dart_iommu.c')
| -rw-r--r-- | arch/powerpc/sysdev/dart_iommu.c | 300 |
1 files changed, 130 insertions, 170 deletions
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index bd968a43a48b..c0d10c149661 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * arch/powerpc/sysdev/dart_iommu.c * @@ -10,21 +11,6 @@ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation * * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu. - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/init.h> @@ -38,8 +24,8 @@ #include <linux/suspend.h> #include <linux/memblock.h> #include <linux/gfp.h> +#include <linux/of_address.h> #include <asm/io.h> -#include <asm/prom.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> #include <asm/machdep.h> @@ -48,16 +34,10 @@ #include "dart.h" -/* Physical base address and size of the DART table */ -unsigned long dart_tablebase; /* exported to htab_initialize */ +/* DART table address and size */ +static u32 *dart_tablebase; static unsigned long dart_tablesize; -/* Virtual base address of the DART table */ -static u32 *dart_vbase; -#ifdef CONFIG_PM -static u32 *dart_copy; -#endif - /* Mapped base address for the dart */ static unsigned int __iomem *dart; @@ -151,6 +131,34 @@ wait_more: spin_unlock_irqrestore(&invalidate_lock, flags); } +static void dart_cache_sync(unsigned int *base, unsigned int count) +{ + /* + * We add 1 to the number of entries to flush, following a + * comment in Darwin indicating that the memory controller + * can prefetch unmapped memory under some circumstances. + */ + unsigned long start = (unsigned long)base; + unsigned long end = start + (count + 1) * sizeof(unsigned int); + unsigned int tmp; + + /* Perform a standard cache flush */ + flush_dcache_range(start, end); + + /* + * Perform the sequence described in the CPC925 manual to + * ensure all the data gets to a point the cache incoherent + * DART hardware will see. + */ + asm volatile(" sync;" + " isync;" + " dcbf 0,%1;" + " sync;" + " isync;" + " lwz %0,0(%1);" + " isync" : "=r" (tmp) : "r" (end) : "memory"); +} + static void dart_flush(struct iommu_table *tbl) { mb(); @@ -163,15 +171,15 @@ static void dart_flush(struct iommu_table *tbl) static int dart_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + unsigned long attrs) { - unsigned int *dp; + unsigned int *dp, *orig_dp; unsigned int rpn; long l; DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); - dp = ((unsigned int*)tbl->it_base) + index; + orig_dp = dp = ((unsigned int*)tbl->it_base) + index; /* On U3, all memory is contiguous, so we can move this * out of the loop. @@ -184,11 +192,7 @@ static int dart_build(struct iommu_table *tbl, long index, uaddr += DART_PAGE_SIZE; } - - /* make sure all updates have reached memory */ - mb(); - in_be32((unsigned __iomem *)dp); - mb(); + dart_cache_sync(orig_dp, npages); if (dart_is_u4) { rpn = index; @@ -203,7 +207,8 @@ static int dart_build(struct iommu_table *tbl, long index, static void dart_free(struct iommu_table *tbl, long index, long npages) { - unsigned int *dp; + unsigned int *dp, *orig_dp; + long orig_npages = npages; /* We don't worry about flushing the TLB cache. The only drawback of * not doing it is that we won't catch buggy device drivers doing @@ -212,57 +217,87 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) DBG("dart: free at: %lx, %lx\n", index, npages); - dp = ((unsigned int *)tbl->it_base) + index; + orig_dp = dp = ((unsigned int *)tbl->it_base) + index; while (npages--) *(dp++) = dart_emptyval; -} + dart_cache_sync(orig_dp, orig_npages); +} -static int __init dart_init(struct device_node *dart_node) +static void __init allocate_dart(void) { - unsigned int i; - unsigned long tmp, base, size; - struct resource r; - - if (dart_tablebase == 0 || dart_tablesize == 0) { - printk(KERN_INFO "DART: table not allocated, using " - "direct DMA\n"); - return -ENODEV; - } + unsigned long tmp; - if (of_address_to_resource(dart_node, 0, &r)) - panic("DART: can't get register base ! "); + /* 512 pages (2MB) is max DART tablesize. */ + dart_tablesize = 1UL << 21; - /* Make sure nothing from the DART range remains in the CPU cache - * from a previous mapping that existed before the kernel took - * over + /* + * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we + * will blow up an entire large page anyway in the kernel mapping. */ - flush_dcache_phys_range(dart_tablebase, - dart_tablebase + dart_tablesize); + dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M, + MEMBLOCK_LOW_LIMIT, SZ_2G, + NUMA_NO_NODE); + if (!dart_tablebase) + panic("Failed to allocate 16MB below 2GB for DART table\n"); /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge * prefetching into invalid pages and corrupting data */ - tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); + tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); + if (!tmp) + panic("DART: table allocation failed\n"); + dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); + printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase); +} + +static int __init dart_init(struct device_node *dart_node) +{ + unsigned int i; + unsigned long base, size; + struct resource r; + + /* IOMMU disabled by the user ? bail out */ + if (iommu_is_off) + return -ENODEV; + + /* + * Only use the DART if the machine has more than 1GB of RAM + * or if requested with iommu=on on cmdline. + * + * 1GB of RAM is picked as limit because some default devices + * (i.e. Airport Extreme) have 30 bit address range limits. + */ + + if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) + return -ENODEV; + + /* Get DART registers */ + if (of_address_to_resource(dart_node, 0, &r)) + panic("DART: can't get register base ! "); + /* Map in DART registers */ dart = ioremap(r.start, resource_size(&r)); if (dart == NULL) panic("DART: Cannot map registers!"); - /* Map in DART table */ - dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize); + /* Allocate the DART and dummy page */ + allocate_dart(); /* Fill initial table */ for (i = 0; i < dart_tablesize/4; i++) - dart_vbase[i] = dart_emptyval; + dart_tablebase[i] = dart_emptyval; + + /* Push to memory */ + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); /* Initialize DART with table base and enable it. */ - base = dart_tablebase >> DART_PAGE_SHIFT; + base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT; size = dart_tablesize >> DART_PAGE_SHIFT; if (dart_is_u4) { size &= DART_SIZE_U4_SIZE_MASK; @@ -286,18 +321,27 @@ static int __init dart_init(struct device_node *dart_node) return 0; } +static struct iommu_table_ops iommu_dart_ops = { + .set = dart_build, + .clear = dart_free, + .flush = dart_flush, +}; + static void iommu_table_dart_setup(void) { iommu_table_dart.it_busno = 0; iommu_table_dart.it_offset = 0; /* it_size is in number of entries */ iommu_table_dart.it_size = dart_tablesize / sizeof(u32); + iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Initialize the common IOMMU code */ - iommu_table_dart.it_base = (unsigned long)dart_vbase; + iommu_table_dart.it_base = (unsigned long)dart_tablebase; iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; - iommu_init_table(&iommu_table_dart, -1); + iommu_table_dart.it_ops = &iommu_dart_ops; + if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) + panic("Failed to initialize iommu table"); /* Reserve the last page of the DART to avoid possible prefetch * past the DART mapped area @@ -305,22 +349,6 @@ static void iommu_table_dart_setup(void) set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); } -static void dma_dev_setup_dart(struct device *dev) -{ - /* We only have one iommu table on the mac for now, which makes - * things simple. Setup all PCI devices to point to this table - */ - if (get_dma_ops(dev) == &dma_direct_ops) - set_dma_offset(dev, DART_U4_BYPASS_BASE); - else - set_iommu_table_base(dev, &iommu_table_dart); -} - -static void pci_dma_dev_setup_dart(struct pci_dev *dev) -{ - dma_dev_setup_dart(&dev->dev); -} - static void pci_dma_bus_setup_dart(struct pci_bus *bus) { if (!iommu_table_dart_inited) { @@ -344,31 +372,21 @@ static bool dart_device_on_pcie(struct device *dev) return false; } -static int dart_dma_set_mask(struct device *dev, u64 dma_mask) +static void pci_dma_dev_setup_dart(struct pci_dev *dev) { - if (!dev->dma_mask || !dma_supported(dev, dma_mask)) - return -EIO; - - /* U4 supports a DART bypass, we use it for 64-bit capable - * devices to improve performances. However, that only works - * for devices connected to U4 own PCIe interface, not bridged - * through hypertransport. We need the device to support at - * least 40 bits of addresses. - */ - if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) { - dev_info(dev, "Using 64-bit DMA iommu bypass\n"); - set_dma_ops(dev, &dma_direct_ops); - } else { - dev_info(dev, "Using 32-bit DMA via iommu\n"); - set_dma_ops(dev, &dma_iommu_ops); - } - dma_dev_setup_dart(dev); + if (dart_is_u4 && dart_device_on_pcie(&dev->dev)) + dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE; + set_iommu_table_base(&dev->dev, &iommu_table_dart); +} - *dev->dma_mask = dma_mask; - return 0; +static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask) +{ + return dart_is_u4 && + dart_device_on_pcie(&dev->dev) && + mask >= DMA_BIT_MASK(40); } -void __init iommu_init_early_dart(void) +void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) { struct device_node *dn; @@ -382,99 +400,41 @@ void __init iommu_init_early_dart(void) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) - goto bail; - - /* Setup low level TCE operations for the core IOMMU code */ - ppc_md.tce_build = dart_build; - ppc_md.tce_free = dart_free; - ppc_md.tce_flush = dart_flush; - - /* Setup bypass if supported */ - if (dart_is_u4) - ppc_md.dma_set_mask = dart_dma_set_mask; - - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart; - ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart; + if (dart_init(dn) != 0) { + of_node_put(dn); + return; + } + /* + * U4 supports a DART bypass, we use it for 64-bit capable devices to + * improve performance. However, that only works for devices connected + * to the U4 own PCIe interface, not bridged through hypertransport. + * We need the device to support at least 40 bits of addresses. + */ + controller_ops->dma_dev_setup = pci_dma_dev_setup_dart; + controller_ops->dma_bus_setup = pci_dma_bus_setup_dart; + controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart; /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); - return; - - bail: - /* If init failed, use direct iommu and null setup functions */ - ppc_md.pci_dma_dev_setup = NULL; - ppc_md.pci_dma_bus_setup = NULL; - - /* Setup pci_dma ops */ - set_pci_dma_ops(&dma_direct_ops); + of_node_put(dn); } #ifdef CONFIG_PM -static void iommu_dart_save(void) -{ - memcpy(dart_copy, dart_vbase, 2*1024*1024); -} - static void iommu_dart_restore(void) { - memcpy(dart_vbase, dart_copy, 2*1024*1024); + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); dart_tlb_invalidate_all(); } static int __init iommu_init_late_dart(void) { - unsigned long tbasepfn; - struct page *p; - - /* if no dart table exists then we won't need to save it - * and the area has also not been reserved */ if (!dart_tablebase) return 0; - tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT; - register_nosave_region_late(tbasepfn, - tbasepfn + ((1<<24) >> PAGE_SHIFT)); - - /* For suspend we need to copy the dart contents because - * it is not part of the regular mapping (see above) and - * thus not saved automatically. The memory for this copy - * must be allocated early because we need 2 MB. */ - p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT); - BUG_ON(!p); - dart_copy = page_address(p); - - ppc_md.iommu_save = iommu_dart_save; ppc_md.iommu_restore = iommu_dart_restore; return 0; } late_initcall(iommu_init_late_dart); -#endif - -void __init alloc_dart_table(void) -{ - /* Only reserve DART space if machine has more than 1GB of RAM - * or if requested with iommu=on on cmdline. - * - * 1GB of RAM is picked as limit because some default devices - * (i.e. Airport Extreme) have 30 bit address range limits. - */ - - if (iommu_is_off) - return; - - if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) - return; - - /* 512 pages (2MB) is max DART tablesize. */ - dart_tablesize = 1UL << 21; - /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we - * will blow up an entire large page anyway in the kernel mapping - */ - dart_tablebase = (unsigned long) - __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); - - printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); -} +#endif /* CONFIG_PM */ |
