diff options
Diffstat (limited to 'arch/arc/mm/dma.c')
| -rw-r--r-- | arch/arc/mm/dma.c | 250 |
1 files changed, 61 insertions, 189 deletions
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 71d3efff99d3..6b85e94f3275 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -1,78 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * DMA Coherent API Notes - * - * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessing it using a kernel virtual address, with - * Cache bit off in the TLB entry. - * - * The default DMA address == Phy address which is 0x8000_0000 based. */ -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <asm/cache.h> #include <asm/cacheflush.h> +/* + * ARCH specific callbacks for generic noncoherent DMA ops + * - hardware IOC not available (or "dma-coherent" not set for device in DT) + * - But still handle both coherent and non-coherent requests from caller + * + * For DMA coherent hardware (IOC) generic code suffices + */ -static void *arc_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - unsigned long order = get_order(size); - struct page *page; - phys_addr_t paddr; - void *kvaddr; - int need_coh = 1, need_kvaddr = 0; - - page = alloc_pages(gfp, order); - if (!page) - return NULL; - - /* - * IOC relies on all data (even coherent DMA data) being in cache - * Thus allocate normal cached memory - * - * The gains with IOC are two pronged: - * -For streaming data, elides need for cache maintenance, saving - * cycles in flush code, and bus bandwidth as all the lines of a - * buffer need to be flushed out to memory - * -For coherent data, Read/Write to buffers terminate early in cache - * (vs. always going to memory - thus are faster) - */ - if ((is_isa_arcv2() && ioc_enable) || - (attrs & DMA_ATTR_NON_CONSISTENT)) - need_coh = 0; - - /* - * - A coherent buffer needs MMU mapping to enforce non-cachability - * - A highmem page needs a virtual handle (hence MMU mapping) - * independent of cachability - */ - if (PageHighMem(page) || need_coh) - need_kvaddr = 1; - - /* This is linear addr (0x8000_0000 based) */ - paddr = page_to_phys(page); - - *dma_handle = plat_phys_to_dma(dev, paddr); - - /* This is kernel Virtual address (0x7000_0000 based) */ - if (need_kvaddr) { - kvaddr = ioremap_nocache(paddr, size); - if (kvaddr == NULL) { - __free_pages(page, order); - return NULL; - } - } else { - kvaddr = (void *)(u32)paddr; - } - /* * Evict any existing L1 and/or L2 lines for the backing page * in case it was used earlier as a normal "cached" page. @@ -83,151 +27,79 @@ static void *arc_dma_alloc(struct device *dev, size_t size, * Currently flush_cache_vmap nukes the L1 cache completely which * will be optimized as a separate commit */ - if (need_coh) - dma_cache_wback_inv(paddr, size); - - return kvaddr; -} - -static void arc_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle); - struct page *page = virt_to_page(paddr); - int is_non_coh = 1; - - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_enable); - - if (PageHighMem(page) || !is_non_coh) - iounmap((void __force __iomem *)vaddr); - - __free_pages(page, get_order(size)); -} - -static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr)); - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off < count && user_count <= (count - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - user_count << PAGE_SHIFT, - vma->vm_page_prot); - } - - return ret; + dma_cache_wback_inv(page_to_phys(page), size); } /* - * streaming DMA Mapping API... - * CPU accesses page via normal paddr, thus needs to explicitly made - * consistent before each use + * Cache operations depending on function and direction argument, inspired by + * https://lore.kernel.org/lkml/20180518175004.GF17671@n2100.armlinux.org.uk + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) */ -static void _dma_cache_sync(phys_addr_t paddr, size_t size, + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { switch (dir) { - case DMA_FROM_DEVICE: - dma_cache_inv(paddr, size); - break; case DMA_TO_DEVICE: dma_cache_wback(paddr, size); break; + + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; + case DMA_BIDIRECTIONAL: dma_cache_wback_inv(paddr, size); break; + default: - pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr); + break; } } -static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = page_to_phys(page) + offset; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - _dma_cache_sync(paddr, size, dir); - - return plat_phys_to_dma(dev, paddr); -} - -static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - s->dma_address = dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); - - return nents; -} - -static void arc_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_FROM_DEVICE); -} - -static void arc_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(plat_dma_to_phys(dev, dma_handle), size, DMA_TO_DEVICE); -} - -static void arc_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nelems, +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); -} + switch (dir) { + case DMA_TO_DEVICE: + break; -static void arc_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction dir) -{ - int i; - struct scatterlist *sg; + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); + default: + break; + } } -static int arc_dma_supported(struct device *dev, u64 dma_mask) +/* + * Plug in direct dma map ops. + */ +void arch_setup_dma_ops(struct device *dev, bool coherent) { - /* Support 32 bit DMA mask exclusively */ - return dma_mask == DMA_BIT_MASK(32); -} + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers. + */ + if (is_isa_arcv2() && ioc_enable && coherent) + dev->dma_coherent = true; -const struct dma_map_ops arc_dma_ops = { - .alloc = arc_dma_alloc, - .free = arc_dma_free, - .mmap = arc_dma_mmap, - .map_page = arc_dma_map_page, - .map_sg = arc_dma_map_sg, - .sync_single_for_device = arc_dma_sync_single_for_device, - .sync_single_for_cpu = arc_dma_sync_single_for_cpu, - .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu, - .sync_sg_for_device = arc_dma_sync_sg_for_device, - .dma_supported = arc_dma_supported, -}; -EXPORT_SYMBOL(arc_dma_ops); + dev_info(dev, "use %scoherent DMA ops\n", + dev->dma_coherent ? "" : "non"); +} |
