diff options
Diffstat (limited to 'kernel/dma')
-rw-r--r-- | kernel/dma/Kconfig | 121 | ||||
-rw-r--r-- | kernel/dma/Makefile | 8 | ||||
-rw-r--r-- | kernel/dma/coherent.c | 203 | ||||
-rw-r--r-- | kernel/dma/contiguous.c | 249 | ||||
-rw-r--r-- | kernel/dma/debug.c | 298 | ||||
-rw-r--r-- | kernel/dma/debug.h | 130 | ||||
-rw-r--r-- | kernel/dma/direct.c | 605 | ||||
-rw-r--r-- | kernel/dma/direct.h | 128 | ||||
-rw-r--r-- | kernel/dma/dummy.c | 5 | ||||
-rw-r--r-- | kernel/dma/map_benchmark.c | 358 | ||||
-rw-r--r-- | kernel/dma/mapping.c | 586 | ||||
-rw-r--r-- | kernel/dma/ops_helpers.c | 93 | ||||
-rw-r--r-- | kernel/dma/pool.c | 165 | ||||
-rw-r--r-- | kernel/dma/remap.c | 7 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 1949 | ||||
-rw-r--r-- | kernel/dma/virt.c | 59 |
16 files changed, 3762 insertions, 1202 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 1da3f44f2565..d62f5957f36b 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -1,10 +1,32 @@ # SPDX-License-Identifier: GPL-2.0-only +config NO_DMA + bool + config HAS_DMA bool depends on !NO_DMA default y +config DMA_OPS + depends on HAS_DMA + bool + +# +# IOMMU drivers that can bypass the IOMMU code and optionally use the direct +# mapping fast path should select this option and set the dma_ops_bypass +# flag in struct device where applicable +# +config DMA_OPS_BYPASS + bool + +# Lets platform IOMMU driver choose between bypass and IOMMU +config ARCH_HAS_DMA_MAP_DIRECT + bool + +config NEED_SG_DMA_FLAGS + bool + config NEED_SG_DMA_LENGTH bool @@ -14,21 +36,24 @@ config NEED_DMA_MAP_STATE config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT || PHYS_ADDR_T_64BIT -config ARCH_HAS_DMA_COHERENCE_H - bool - config ARCH_HAS_DMA_SET_MASK bool # # Select this option if the architecture needs special handling for # DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what -# people thing of when saying write combine, so very few platforms should +# people think of when saying write combine, so very few platforms should # need to enable this. # config ARCH_HAS_DMA_WRITE_COMBINE bool +# +# Select if the architectures provides the arch_dma_mark_clean hook +# +config ARCH_HAS_DMA_MARK_CLEAN + bool + config DMA_DECLARE_COHERENT bool @@ -54,17 +79,47 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_NONCOHERENT_CACHE_SYNC - bool - -config DMA_VIRT_OPS +# +# Select this option if the architecture assumes DMA devices are coherent +# by default. +# +config ARCH_DMA_DEFAULT_COHERENT bool - depends on HAS_DMA config SWIOTLB bool select NEED_DMA_MAP_STATE +config SWIOTLB_DYNAMIC + bool "Dynamic allocation of DMA bounce buffers" + default n + depends on SWIOTLB + help + This enables dynamic resizing of the software IO TLB. The kernel + starts with one memory pool at boot and it will allocate additional + pools as needed. To reduce run-time kernel memory requirements, you + may have to specify a smaller size of the initial pool using + "swiotlb=" on the kernel command line. + + If unsure, say N. + +config DMA_BOUNCE_UNALIGNED_KMALLOC + bool + depends on SWIOTLB + +config DMA_RESTRICTED_POOL + bool "DMA Restricted Pool" + depends on OF && OF_RESERVED_MEM && SWIOTLB + help + This enables support for restricted DMA pools which provide a level of + DMA memory protection on systems with limited hardware protection + capabilities, such as those lacking an IOMMU. + + For more information see + <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt> + and <kernel/dma/swiotlb.c>. + If unsure, say "n". + # # Should be selected if we can mmap non-coherent mappings to userspace. # The only thing that is really required is a way to set an uncached bit @@ -78,15 +133,25 @@ config DMA_COHERENT_POOL select GENERIC_ALLOCATOR bool -config DMA_REMAP +config DMA_GLOBAL_POOL + select DMA_DECLARE_COHERENT + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP bool - depends on MMU - select DMA_NONCOHERENT_MMAP config DMA_DIRECT_REMAP bool - select DMA_REMAP select DMA_COHERENT_POOL + select DMA_NONCOHERENT_MMAP + +# +# Fallback to arch code for DMA allocations. This should eventually go away. +# +config ARCH_HAS_DMA_ALLOC + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP + depends on !DMA_GLOBAL_POOL + bool config DMA_CMA bool "DMA Contiguous Memory Allocator" @@ -99,10 +164,22 @@ config DMA_CMA You can disable CMA by specifying "cma=0" on the kernel's command line. - For more information see <include/linux/dma-contiguous.h>. + For more information see <kernel/dma/contiguous.c>. If unsure, say "n". if DMA_CMA + +config DMA_NUMA_CMA + bool "Enable separate DMA Contiguous Memory Area for NUMA Node" + depends on NUMA + help + Enable this option to get numa CMA areas so that NUMA devices + can get local memory by DMA coherent APIs. + + You can set the size of pernuma CMA by specifying "cma_pernuma=size" + or set the node id and its size of CMA by specifying "numa_cma= + <node>:size[,<node>:size]" on the kernel's command line. + comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES @@ -147,7 +224,7 @@ endchoice config CMA_ALIGNMENT int "Maximum PAGE_SIZE order of alignment for contiguous buffers" - range 4 12 + range 2 12 default 8 help DMA mapping framework by default aligns all buffers to the smallest @@ -174,11 +251,6 @@ config DMA_API_DEBUG drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This also attempts to catch cases where a page owned by DMA is - accessed by the cpu in a way that could cause data corruption. For - example, this enables cow_user_page() to check that the source page is - not undergoing DMA. - This option causes a performance degradation. Use only if you want to debug device drivers and dma interactions. @@ -200,3 +272,12 @@ config DMA_API_DEBUG_SG is technically out-of-spec. If unsure, say N. + +config DMA_MAP_BENCHMARK + bool "Enable benchmarking of streaming DMA mapping" + depends on DEBUG_FS + help + Provides /sys/kernel/debug/dma_map_benchmark that helps with testing + performance of dma_(un)map_page. + + See tools/testing/selftests/dma/dma_map_benchmark.c diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 370f63344e9c..21926e46ef4f 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,10 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_HAS_DMA) += mapping.o direct.o dummy.o +obj-$(CONFIG_HAS_DMA) += mapping.o direct.o +obj-$(CONFIG_DMA_OPS) += ops_helpers.o +obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += virt.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o -obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_MMU) += remap.o +obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 2a0c4985f38e..ff5683a57f77 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -7,7 +7,8 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> struct dma_coherent_mem { void *virt_base; @@ -19,8 +20,6 @@ struct dma_coherent_mem { bool use_dev_dma_pfn_offset; }; -static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; - static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev) { if (dev && dev->dma_mem) @@ -32,65 +31,56 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, struct dma_coherent_mem * mem) { if (mem->use_dev_dma_pfn_offset) - return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; - else - return mem->device_base; + return phys_to_dma(dev, PFN_PHYS(mem->pfn_base)); + return mem->device_base; } -static int dma_init_coherent_memory(phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, - struct dma_coherent_mem **mem) +static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset) { - struct dma_coherent_mem *dma_mem = NULL; - void *mem_base = NULL; + struct dma_coherent_mem *dma_mem; int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - int ret; + void *mem_base; - if (!size) { - ret = -EINVAL; - goto out; - } + if (!size) + return ERR_PTR(-EINVAL); mem_base = memremap(phys_addr, size, MEMREMAP_WC); - if (!mem_base) { - ret = -EINVAL; - goto out; - } + if (!mem_base) + return ERR_PTR(-EINVAL); + dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dma_mem) { - ret = -ENOMEM; - goto out; - } - dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dma_mem->bitmap) { - ret = -ENOMEM; - goto out; - } + if (!dma_mem) + goto out_unmap_membase; + dma_mem->bitmap = bitmap_zalloc(pages, GFP_KERNEL); + if (!dma_mem->bitmap) + goto out_free_dma_mem; dma_mem->virt_base = mem_base; dma_mem->device_base = device_addr; dma_mem->pfn_base = PFN_DOWN(phys_addr); dma_mem->size = pages; + dma_mem->use_dev_dma_pfn_offset = use_dma_pfn_offset; spin_lock_init(&dma_mem->spinlock); - *mem = dma_mem; - return 0; + return dma_mem; -out: +out_free_dma_mem: kfree(dma_mem); - if (mem_base) - memunmap(mem_base); - return ret; +out_unmap_membase: + memunmap(mem_base); + pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %zd MiB\n", + &phys_addr, size / SZ_1M); + return ERR_PTR(-ENOMEM); } -static void dma_release_coherent_memory(struct dma_coherent_mem *mem) +static void _dma_release_coherent_memory(struct dma_coherent_mem *mem) { if (!mem) return; memunmap(mem->virt_base); - kfree(mem->bitmap); + bitmap_free(mem->bitmap); kfree(mem); } @@ -107,22 +97,47 @@ static int dma_assign_coherent_memory(struct device *dev, return 0; } +/* + * Declare a region of memory to be handed out by dma_alloc_coherent() when it + * is asked for coherent memory for this device. This shall only be used + * from platform code, usually based on the device tree description. + * + * phys_addr is the CPU physical address to which the memory is currently + * assigned (this will be ioremapped so the CPU can access the region). + * + * device_addr is the DMA address the device needs to be programmed with to + * actually address this memory (this will be handed out as the dma_addr_t in + * dma_alloc_coherent()). + * + * size is the size of the area (must be a multiple of PAGE_SIZE). + * + * As a simplification for the platforms, only *one* such region of memory may + * be declared per device. + */ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) { struct dma_coherent_mem *mem; int ret; - ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem); - if (ret) - return ret; + mem = dma_init_coherent_memory(phys_addr, device_addr, size, false); + if (IS_ERR(mem)) + return PTR_ERR(mem); ret = dma_assign_coherent_memory(dev, mem); if (ret) - dma_release_coherent_memory(mem); + _dma_release_coherent_memory(mem); return ret; } +void dma_release_coherent_memory(struct device *dev) +{ + if (dev) { + _dma_release_coherent_memory(dev->dma_mem); + dev->dma_mem = NULL; + } +} + static void *__dma_alloc_from_coherent(struct device *dev, struct dma_coherent_mem *mem, ssize_t size, dma_addr_t *dma_handle) @@ -181,16 +196,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, return 1; } -void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle) -{ - if (!dma_coherent_default_memory) - return NULL; - - return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size, - dma_handle); -} - static int __dma_release_from_coherent(struct dma_coherent_mem *mem, int order, void *vaddr) { @@ -226,15 +231,6 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) return __dma_release_from_coherent(mem, order, vaddr); } -int dma_release_from_global_coherent(int order, void *vaddr) -{ - if (!dma_coherent_default_memory) - return 0; - - return __dma_release_from_coherent(dma_coherent_default_memory, order, - vaddr); -} - static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) { @@ -280,6 +276,28 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); } +#ifdef CONFIG_DMA_GLOBAL_POOL +static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; + +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle) +{ + if (!dma_coherent_default_memory) + return NULL; + + return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size, + dma_handle); +} + +int dma_release_from_global_coherent(int order, void *vaddr) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_release_from_coherent(dma_coherent_default_memory, order, + vaddr); +} + int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) { @@ -290,6 +308,19 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, vaddr, size, ret); } +int dma_init_global_coherent(phys_addr_t phys_addr, size_t size) +{ + struct dma_coherent_mem *mem; + + mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true); + if (IS_ERR(mem)) + return PTR_ERR(mem); + dma_coherent_default_memory = mem; + pr_info("DMA: default coherent area is set\n"); + return 0; +} +#endif /* CONFIG_DMA_GLOBAL_POOL */ + /* * Support for reserved memory regions defined in device tree */ @@ -298,25 +329,22 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, #include <linux/of_fdt.h> #include <linux/of_reserved_mem.h> +#ifdef CONFIG_DMA_GLOBAL_POOL static struct reserved_mem *dma_reserved_default_memory __initdata; +#endif static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - struct dma_coherent_mem *mem = rmem->priv; - int ret; - - if (!mem) { - ret = dma_init_coherent_memory(rmem->base, rmem->base, - rmem->size, &mem); - if (ret) { - pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - return ret; - } + if (!rmem->priv) { + struct dma_coherent_mem *mem; + + mem = dma_init_coherent_memory(rmem->base, rmem->base, + rmem->size, true); + if (IS_ERR(mem)) + return PTR_ERR(mem); + rmem->priv = mem; } - mem->use_dev_dma_pfn_offset = true; - rmem->priv = mem; - dma_assign_coherent_memory(dev, mem); + dma_assign_coherent_memory(dev, rmem->priv); return 0; } @@ -344,7 +372,9 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) pr_err("Reserved memory: regions without no-map are not yet supported\n"); return -EINVAL; } +#endif +#ifdef CONFIG_DMA_GLOBAL_POOL if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { WARN(dma_reserved_default_memory, "Reserved memory: region for default DMA coherent area is redefined\n"); @@ -358,31 +388,16 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) return 0; } +#ifdef CONFIG_DMA_GLOBAL_POOL static int __init dma_init_reserved_memory(void) { - const struct reserved_mem_ops *ops; - int ret; - if (!dma_reserved_default_memory) return -ENOMEM; - - ops = dma_reserved_default_memory->ops; - - /* - * We rely on rmem_dma_device_init() does not propagate error of - * dma_assign_coherent_memory() for "NULL" device. - */ - ret = ops->device_init(dma_reserved_default_memory, NULL); - - if (!ret) { - dma_coherent_default_memory = dma_reserved_default_memory->priv; - pr_info("DMA: default coherent area is set\n"); - } - - return ret; + return dma_init_global_coherent(dma_reserved_default_memory->base, + dma_reserved_default_memory->size); } - core_initcall(dma_init_reserved_memory); +#endif /* CONFIG_DMA_GLOBAL_POOL */ RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); #endif diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 15bc5026c485..055da410ac71 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -5,24 +5,46 @@ * Written by: * Marek Szyprowski <m.szyprowski@samsung.com> * Michal Nazarewicz <mina86@mina86.com> + * + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more than 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. */ #define pr_fmt(fmt) "cma: " fmt -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - #include <asm/page.h> -#include <asm/dma-contiguous.h> #include <linux/memblock.h> #include <linux/err.h> #include <linux/sizes.h> -#include <linux/dma-contiguous.h> +#include <linux/dma-map-ops.h> #include <linux/cma.h> +#include <linux/nospec.h> #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES @@ -69,20 +91,57 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); +#ifdef CONFIG_DMA_NUMA_CMA + +static struct cma *dma_contiguous_numa_area[MAX_NUMNODES]; +static phys_addr_t numa_cma_size[MAX_NUMNODES] __initdata; +static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES]; +static phys_addr_t pernuma_size_bytes __initdata; + +static int __init early_numa_cma(char *p) +{ + int nid, count = 0; + unsigned long tmp; + char *s = p; + + while (*s) { + if (sscanf(s, "%lu%n", &tmp, &count) != 1) + break; + + if (s[count] == ':') { + if (tmp >= MAX_NUMNODES) + break; + nid = array_index_nospec(tmp, MAX_NUMNODES); + + s += count + 1; + tmp = memparse(s, &s); + numa_cma_size[nid] = tmp; + + if (*s == ',') + s++; + else + break; + } else + break; + } + + return 0; +} +early_param("numa_cma", early_numa_cma); + +static int __init early_cma_pernuma(char *p) +{ + pernuma_size_bytes = memparse(p, &p); + return 0; +} +early_param("cma_pernuma", early_cma_pernuma); +#endif + #ifdef CONFIG_CMA_SIZE_PERCENTAGE static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) { - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* - * We cannot use memblock_phys_mem_size() here, because - * memblock_analyze() has not been called yet. - */ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); + unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size()); return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; } @@ -96,6 +155,51 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif +#ifdef CONFIG_DMA_NUMA_CMA +static void __init dma_numa_cma_reserve(void) +{ + int nid; + + for_each_node(nid) { + int ret; + char name[CMA_MAX_NAME]; + struct cma **cma; + + if (!node_online(nid)) { + if (pernuma_size_bytes || numa_cma_size[nid]) + pr_warn("invalid node %d specified\n", nid); + continue; + } + + if (pernuma_size_bytes) { + + cma = &dma_contiguous_pernuma_area[nid]; + snprintf(name, sizeof(name), "pernuma%d", nid); + ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, + 0, false, name, cma, nid); + if (ret) + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + } + + if (numa_cma_size[nid]) { + + cma = &dma_contiguous_numa_area[nid]; + snprintf(name, sizeof(name), "numa%d", nid); + ret = cma_declare_contiguous_nid(0, numa_cma_size[nid], 0, 0, 0, false, + name, cma, nid); + if (ret) + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + } + } +} +#else +static inline void __init dma_numa_cma_reserve(void) +{ +} +#endif + /** * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). @@ -112,6 +216,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit) phys_addr_t selected_limit = limit; bool fixed = false; + dma_numa_cma_reserve(); + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { @@ -143,6 +249,11 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } } +void __weak +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} + /** * dma_contiguous_reserve_area() - reserve custom contiguous area * @size: Size of the reserved area (in bytes), @@ -215,40 +326,65 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return cma_release(dev_get_cma_area(dev), pages, count); } +static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) +{ + unsigned int align = min(get_order(size), CONFIG_CMA_ALIGNMENT); + + return cma_alloc(cma, size >> PAGE_SHIFT, align, gfp & __GFP_NOWARN); +} + /** * dma_alloc_contiguous() - allocate contiguous pages * @dev: Pointer to device for which the allocation is performed. * @size: Requested allocation size. * @gfp: Allocation flags. * - * This function allocates contiguous memory buffer for specified device. It - * tries to use device specific contiguous memory area if available, or the - * default global one. + * tries to use device specific contiguous memory area if available, or it + * tries to use per-numa cma, if the allocation fails, it will fallback to + * try default global one. * - * Note that it byapss one-page size of allocations from the global area as - * the addresses within one page are always contiguous, so there is no need - * to waste CMA pages for that kind; it also helps reduce fragmentations. + * Note that it bypass one-page size of allocations from the per-numa and + * global area as the addresses within one page are always contiguous, so + * there is no need to waste CMA pages for that kind; it also helps reduce + * fragmentations. */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { - size_t count = size >> PAGE_SHIFT; - struct page *page = NULL; - struct cma *cma = NULL; - - if (dev && dev->cma_area) - cma = dev->cma_area; - else if (count > 1) - cma = dma_contiguous_default_area; +#ifdef CONFIG_DMA_NUMA_CMA + int nid = dev_to_node(dev); +#endif /* CMA can be used only in the context which permits sleeping */ - if (cma && gfpflags_allow_blocking(gfp)) { - size_t align = get_order(size); - size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); - - page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); + if (!gfpflags_allow_blocking(gfp)) + return NULL; + if (dev->cma_area) + return cma_alloc_aligned(dev->cma_area, size, gfp); + if (size <= PAGE_SIZE) + return NULL; + +#ifdef CONFIG_DMA_NUMA_CMA + if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) { + struct cma *cma = dma_contiguous_pernuma_area[nid]; + struct page *page; + + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } + + cma = dma_contiguous_numa_area[nid]; + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } } +#endif + if (!dma_contiguous_default_area) + return NULL; - return page; + return cma_alloc_aligned(dma_contiguous_default_area, size, gfp); } /** @@ -264,9 +400,30 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) */ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { - if (!cma_release(dev_get_cma_area(dev), page, - PAGE_ALIGN(size) >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + /* if dev has its own cma, free page from there */ + if (dev->cma_area) { + if (cma_release(dev->cma_area, page, count)) + return; + } else { + /* + * otherwise, page is from either per-numa cma or default cma + */ +#ifdef CONFIG_DMA_NUMA_CMA + if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)], + page, count)) + return; + if (cma_release(dma_contiguous_numa_area[page_to_nid(page)], + page, count)) + return; +#endif + if (cma_release(dma_contiguous_default_area, page, count)) + return; + } + + /* not in any cma, free from buddy */ + __free_pages(page, get_order(size)); } /* @@ -282,14 +439,14 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, rmem->priv); + dev->cma_area = rmem->priv; return 0; } static void rmem_cma_device_release(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, NULL); + dev->cma_area = NULL; } static const struct reserved_mem_ops rmem_cma_ops = { @@ -299,8 +456,6 @@ static const struct reserved_mem_ops rmem_cma_ops = { static int __init rmem_cma_setup(struct reserved_mem *rmem) { - phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); - phys_addr_t mask = align - 1; unsigned long node = rmem->fdt_node; bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); struct cma *cma; @@ -316,7 +471,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; - if ((rmem->base & mask) || (rmem->size & mask)) { + if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) { pr_err("Reserved memory: incorrect alignment of CMA region\n"); return -EINVAL; } @@ -330,7 +485,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) - dma_contiguous_set_default(cma); + dma_contiguous_default_area = cma; rmem->ops = &rmem_cma_ops; rmem->priv = cma; diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 36c962a86bf2..a6e3792b15f8 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -9,10 +9,9 @@ #include <linux/sched/task_stack.h> #include <linux/scatterlist.h> -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/sched/task.h> #include <linux/stacktrace.h> -#include <linux/dma-debug.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> #include <linux/debugfs.h> @@ -24,8 +23,8 @@ #include <linux/ctype.h> #include <linux/list.h> #include <linux/slab.h> - #include <asm/sections.h> +#include "debug.h" #define HASH_SIZE 16384ULL #define HASH_FN_SHIFT 13 @@ -54,6 +53,7 @@ enum map_err_types { * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping * @list: node on pre-allocated free_entries list * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @dev_addr: dma address * @size: length of the mapping * @type: single, page, sg, coherent * @direction: enum dma_data_direction @@ -62,7 +62,8 @@ enum map_err_types { * @pfn: page frame of the start address * @offset: offset of mapping relative to pfn * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected + * @stack_len: number of backtrace entries in @stack_entries + * @stack_entries: stack of backtrace history */ struct dma_debug_entry { struct list_head list; @@ -139,13 +140,17 @@ static const char *const maperr2str[] = { static const char *type2name[] = { [dma_debug_single] = "single", - [dma_debug_sg] = "scather-gather", + [dma_debug_sg] = "scatter-gather", [dma_debug_coherent] = "coherent", [dma_debug_resource] = "resource", }; -static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE" }; +static const char *dir2name[] = { + [DMA_BIDIRECTIONAL] = "DMA_BIDIRECTIONAL", + [DMA_TO_DEVICE] = "DMA_TO_DEVICE", + [DMA_FROM_DEVICE] = "DMA_FROM_DEVICE", + [DMA_NONE] = "DMA_NONE", +}; /* * The access to some variables in this macro is racy. We can't use atomic_t @@ -347,11 +352,10 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, unsigned long *flags) { - unsigned int max_range = dma_get_max_seg_size(ref->dev); struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; + int limit = min(HASH_SIZE, (index.dev_addr >> HASH_FN_SHIFT) + 1); - while (range <= max_range) { + for (int i = 0; i < limit; i++) { entry = __hash_bucket_find(*bucket, ref, containing_match); if (entry) @@ -361,7 +365,6 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, * Nothing found, go back a hash bucket */ put_hash_bucket(*bucket, *flags); - range += (1 << HASH_FN_SHIFT); index.dev_addr -= (1 << HASH_FN_SHIFT); *bucket = get_hash_bucket(&index, flags); } @@ -395,37 +398,6 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) } /* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - cond_resched(); - } -} - -/* * For each mapping (initial cacheline in the case of * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a * scatterlist, or the cacheline specified in dma_map_single) insert @@ -444,11 +416,8 @@ void debug_dma_dump_mappings(struct device *dev) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) @@ -493,10 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln) overlap = active_cacheline_set_overlap(cln, ++overlap); /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. + * leaking dma-mappings. */ WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"), @@ -551,58 +517,75 @@ static void active_cacheline_remove(struct dma_debug_entry *entry) spin_unlock_irqrestore(&radix_lock, flags); } -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. +/* + * Dump mappings entries on kernel space for debugging purposes */ -void debug_dma_assert_idle(struct page *page) +void debug_dma_dump_mappings(struct device *dev) { - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; + int idx; phys_addr_t cln; - if (dma_debug_disabled()) - return; - - if (!page) - return; + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); + spin_lock_irqsave(&bucket->lock, flags); + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + cln = to_cacheline_number(entry); + dev_info(entry->dev, + "%s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + type2name[entry->type], idx, + phys_addr(entry), entry->pfn, + entry->dev_addr, entry->size, + &cln, dir2name[entry->direction], + maperr2str[entry->map_err_type]); + } + } + spin_unlock_irqrestore(&bucket->lock, flags); - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; + cond_resched(); } - spin_unlock_irqrestore(&radix_lock, flags); +} - if (!entry) - return; +/* + * Dump mappings entries on user space via debugfs + */ +static int dump_show(struct seq_file *seq, void *v) +{ + int idx; + phys_addr_t cln; - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + list_for_each_entry(entry, &bucket->list, list) { + cln = to_cacheline_number(entry); + seq_printf(seq, + "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + dev_driver_string(entry->dev), + dev_name(entry->dev), + type2name[entry->type], idx, + phys_addr(entry), entry->pfn, + entry->dev_addr, entry->size, + &cln, dir2name[entry->direction], + maperr2str[entry->map_err_type]); + } + spin_unlock_irqrestore(&bucket->lock, flags); + } + return 0; } +DEFINE_SHOW_ATTRIBUTE(dump); /* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ -static void add_dma_entry(struct dma_debug_entry *entry) +static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) { struct hash_bucket *bucket; unsigned long flags; @@ -614,13 +597,12 @@ static void add_dma_entry(struct dma_debug_entry *entry) rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); + pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + err_printk(entry->dev, entry, + "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ } static int dma_debug_create_entries(gfp_t gfp) @@ -656,15 +638,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void) return entry; } -static void __dma_entry_alloc_check_leak(void) +/* + * This should be called outside of free_entries_lock scope to avoid potential + * deadlocks with serial consoles that use DMA. + */ +static void __dma_entry_alloc_check_leak(u32 nr_entries) { - u32 tmp = nr_total_entries % nr_prealloc_entries; + u32 tmp = nr_entries % nr_prealloc_entries; /* Shout each time we tick over some multiple of the initial pool */ if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) { pr_info("dma_debug_entry pool grown to %u (%u00%%)\n", - nr_total_entries, - (nr_total_entries / nr_prealloc_entries)); + nr_entries, + (nr_entries / nr_prealloc_entries)); } } @@ -675,8 +661,10 @@ static void __dma_entry_alloc_check_leak(void) */ static struct dma_debug_entry *dma_entry_alloc(void) { + bool alloc_check_leak = false; struct dma_debug_entry *entry; unsigned long flags; + u32 nr_entries; spin_lock_irqsave(&free_entries_lock, flags); if (num_free_entries == 0) { @@ -686,13 +674,17 @@ static struct dma_debug_entry *dma_entry_alloc(void) pr_err("debugging out of memory - disabling\n"); return NULL; } - __dma_entry_alloc_check_leak(); + alloc_check_leak = true; + nr_entries = nr_total_entries; } entry = __dma_entry_alloc(); spin_unlock_irqrestore(&free_entries_lock, flags); + if (alloc_check_leak) + __dma_entry_alloc_check_leak(nr_entries); + #ifdef CONFIG_STACKTRACE entry->stack_len = stack_trace_save(entry->stack_entries, ARRAY_SIZE(entry->stack_entries), @@ -817,34 +809,7 @@ static const struct file_operations filter_fops = { .llseek = default_llseek, }; -static int dump_show(struct seq_file *seq, void *v) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - list_for_each_entry(entry, &bucket->list, list) { - seq_printf(seq, - "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx %s %s\n", - dev_name(entry->dev), - dev_driver_string(entry->dev), - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - spin_unlock_irqrestore(&bucket->lock, flags); - } - return 0; -} -DEFINE_SHOW_ATTRIBUTE(dump); - -static void dma_debug_fs_init(void) +static int __init dma_debug_fs_init(void) { struct dentry *dentry = debugfs_create_dir("dma-api", NULL); @@ -857,7 +822,10 @@ static void dma_debug_fs_init(void) debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries); debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops); debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops); + + return 0; } +core_initcall_sync(dma_debug_fs_init); static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) { @@ -882,7 +850,7 @@ static int device_dma_allocations(struct device *dev, struct dma_debug_entry **o static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct dma_debug_entry *uninitialized_var(entry); + struct dma_debug_entry *entry; int count; if (dma_debug_disabled()) @@ -909,7 +877,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti return 0; } -void dma_debug_add_bus(struct bus_type *bus) +void dma_debug_add_bus(const struct bus_type *bus) { struct notifier_block *nb; @@ -942,8 +910,6 @@ static int dma_debug_init(void) spin_lock_init(&dma_entry_hash[i].lock); } - dma_debug_fs_init(); - nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES); for (i = 0; i < nr_pages; ++i) dma_debug_create_entries(GFP_KERNEL); @@ -977,7 +943,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -986,7 +952,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); @@ -1071,7 +1037,7 @@ static void check_unmap(struct dma_debug_entry *ref) /* * Drivers should use dma_mapping_error() to check the returned * addresses of dma_map_single() and dma_map_page(). - * If not, print this warning message. See Documentation/DMA-API.txt. + * If not, print this warning message. See Documentation/core-api/dma-api.rst. */ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { err_printk(ref->dev, entry, @@ -1116,20 +1082,10 @@ static void check_for_stack(struct device *dev, } } -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) + if (memory_intersects(_stext, _etext, addr, len) || + memory_intersects(__start_rodata, __end_rodata, addr, len)) err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } @@ -1251,7 +1207,8 @@ void debug_dma_map_single(struct device *dev, const void *addr, EXPORT_SYMBOL(debug_dma_map_single); void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr) + size_t size, int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1268,7 +1225,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; entry->pfn = page_to_pfn(page); - entry->offset = offset, + entry->offset = offset; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1282,9 +1239,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, check_for_illegal_area(dev, addr, size); } - add_dma_entry(entry); + add_dma_entry(entry, attrs); } -EXPORT_SYMBOL(debug_dma_map_page); void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -1324,13 +1280,13 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) } EXPORT_SYMBOL(debug_dma_mapping_error); -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, +void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) { struct dma_debug_entry ref = { .type = dma_debug_single, .dev = dev, - .dev_addr = addr, + .dev_addr = dma_addr, .size = size, .direction = direction, }; @@ -1339,10 +1295,10 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, return; check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_page); void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) + int nents, int mapped_ents, int direction, + unsigned long attrs) { struct dma_debug_entry *entry; struct scatterlist *s; @@ -1351,6 +1307,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return; + for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1359,25 +1321,18 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, + entry->offset = s->offset; entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s); - add_dma_entry(entry); + add_dma_entry(entry, attrs); } } -EXPORT_SYMBOL(debug_dma_map_sg); static int get_nr_mapped_entries(struct device *dev, struct dma_debug_entry *ref) @@ -1429,10 +1384,10 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, check_unmap(&ref); } } -EXPORT_SYMBOL(debug_dma_unmap_sg); void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) + dma_addr_t dma_addr, void *virt, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1462,17 +1417,17 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, else entry->pfn = page_to_pfn(virt_to_page(virt)); - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) + void *virt, dma_addr_t dma_addr) { struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, .offset = offset_in_page(virt), - .dev_addr = addr, + .dev_addr = dma_addr, .size = size, .direction = DMA_BIDIRECTIONAL, }; @@ -1493,7 +1448,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, } void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) + int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1513,9 +1469,8 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - add_dma_entry(entry); + add_dma_entry(entry, attrs); } -EXPORT_SYMBOL(debug_dma_map_resource); void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) @@ -1533,7 +1488,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_resource); void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) @@ -1552,7 +1506,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, check_sync(dev, &ref, true); } -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -1572,7 +1525,6 @@ void debug_dma_sync_single_for_device(struct device *dev, check_sync(dev, &ref, false); } -EXPORT_SYMBOL(debug_dma_sync_single_for_device); void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1605,7 +1557,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, true); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1637,7 +1588,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, false); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); static int __init dma_debug_driver_setup(char *str) { diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h new file mode 100644 index 000000000000..f525197d3cae --- /dev/null +++ b/kernel/dma/debug.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel <joerg.roedel@amd.com> + */ + +#ifndef _KERNEL_DMA_DEBUG_H +#define _KERNEL_DMA_DEBUG_H + +#ifdef CONFIG_DMA_API_DEBUG +extern void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + unsigned long attrs); + +extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction); + +extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction, + unsigned long attrs); + +extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir); + +extern void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt, + unsigned long attrs); + +extern void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr); + +extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs); + +extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, + size_t size, int direction); + +extern void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction); + +extern void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction); + +extern void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); + +extern void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction); +#else /* CONFIG_DMA_API_DEBUG */ +static inline void debug_dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction, dma_addr_t dma_addr, + unsigned long attrs) +{ +} + +static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction) +{ +} + +static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction, + unsigned long attrs) +{ +} + +static inline void debug_dma_unmap_sg(struct device *dev, + struct scatterlist *sglist, + int nelems, int dir) +{ +} + +static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt, + unsigned long attrs) +{ +} + +static inline void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ +} + +static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs) +{ +} + +static inline void debug_dma_unmap_resource(struct device *dev, + dma_addr_t dma_addr, size_t size, + int direction) +{ +} + +static inline void debug_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + +static inline void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, + size_t size, int direction) +{ +} + +static inline void debug_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} + +static inline void debug_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nelems, int direction) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ +#endif /* _KERNEL_DMA_DEBUG_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 67f060b86a73..4d543b1e9d57 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -1,23 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2018 Christoph Hellwig. + * Copyright (C) 2018-2020 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ #include <linux/memblock.h> /* for max_pfn */ #include <linux/export.h> #include <linux/mm.h> -#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> #include <linux/scatterlist.h> -#include <linux/dma-contiguous.h> -#include <linux/dma-noncoherent.h> #include <linux/pfn.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> -#include <linux/swiotlb.h> +#include <linux/slab.h> +#include "direct.h" /* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it + * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use * it for entirely different regions. In that case the arch code needs to * override the variable below for dma-direct to work properly. */ @@ -27,7 +26,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { if (force_dma_unencrypted(dev)) - return __phys_to_dma(dev, phys); + return phys_to_dma_unencrypted(dev, phys); return phys_to_dma(dev, phys); } @@ -45,15 +44,11 @@ u64 dma_direct_get_required_mask(struct device *dev) return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } -gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, - u64 *phys_limit) +static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) { - u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); - - if (force_dma_unencrypted(dev)) - *phys_limit = __dma_to_phys(dev, dma_limit); - else - *phys_limit = dma_to_phys(dev, dma_limit); + u64 dma_limit = min_not_zero( + dev->coherent_dma_mask, + dev->bus_dma_limit); /* * Optimistically try the zone that the physical address mask falls @@ -63,6 +58,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ + *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) @@ -72,45 +68,55 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma_direct(dev, phys) + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); + + if (dma_addr == DMA_MAPPING_ERROR) + return false; + return dma_addr + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } -/* - * Decrypting memory is allowed to block, so if this device requires - * unencrypted memory it must come from atomic pools. - */ -static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp, - unsigned long attrs) +static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) { - if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return false; - if (gfpflags_allow_blocking(gfp)) - return false; - if (force_dma_unencrypted(dev)) - return true; - if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return false; - if (dma_alloc_need_uncached(dev, attrs)) - return true; - return false; + if (!force_dma_unencrypted(dev)) + return 0; + return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); } -static inline bool dma_should_free_from_pool(struct device *dev, - unsigned long attrs) +static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) { - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return true; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) - return false; - if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return true; - return false; + int ret; + + if (!force_dma_unencrypted(dev)) + return 0; + ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); + if (ret) + pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); + return ret; +} + +static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) +{ + if (swiotlb_free(dev, page, size)) + return; + dma_free_contiguous(dev, page, size); +} + +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) +{ + struct page *page = swiotlb_alloc(dev, size); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + swiotlb_free(dev, page, size); + return NULL; + } + + return page; } static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs) + gfp_t gfp, bool allow_highmem) { int node = dev_to_node(dev); struct page *page = NULL; @@ -118,17 +124,17 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; + if (is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_swiotlb(dev, size); - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_limit); + gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_free_contiguous(dev, page, size); - page = NULL; + if (page) { + if (!dma_coherent_ok(dev, page_to_phys(page), size) || + (!allow_highmem && PageHighMem(page))) { + dma_free_contiguous(dev, page, size); + page = NULL; + } } again: if (!page) @@ -153,170 +159,242 @@ again: return page; } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +/* + * Check if a potentially blocking operations needs to dip into the atomic + * pools for the given device/gfp. + */ +static bool dma_direct_use_pool(struct device *dev, gfp_t gfp) +{ + return !gfpflags_allow_blocking(gfp) && !is_swiotlb_for_alloc(dev); +} + +static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct page *page; + u64 phys_limit; + void *ret; + + if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))) + return NULL; + + gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); + page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); + if (!page) + return NULL; + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return ret; +} + +static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct page *page; + + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); + if (!page) + return NULL; + + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + + /* return the page pointer as the opaque cookie */ + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +} + +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + bool remap = false, set_uncached = false; struct page *page; void *ret; - int err; size = PAGE_ALIGN(size); + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; - if (dma_should_alloc_from_pool(dev, gfp, attrs)) { - ret = dma_alloc_from_pool(dev, size, &page, gfp); - if (!ret) + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); + + if (!dev_is_dma_coherent(dev)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && + !is_swiotlb_for_alloc(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, + attrs); + + /* + * If there is a global pool, always allocate from it for + * non-coherent devices. + */ + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) + return dma_alloc_from_global_coherent(dev, size, + dma_handle); + + /* + * Otherwise we require the architecture to either be able to + * mark arbitrary parts of the kernel direct mapping uncached, + * or remapped it uncached. + */ + set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED); + remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); + if (!set_uncached && !remap) { + pr_warn_once("coherent DMA allocations not supported on this platform.\n"); return NULL; - goto done; + } } - page = __dma_direct_alloc_pages(dev, size, gfp, attrs); + /* + * Remapping or decrypting memory may block, allocate the memory from + * the atomic pools instead if we aren't allowed block. + */ + if ((remap || force_dma_unencrypted(dev)) && + dma_direct_use_pool(dev, gfp)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); + + /* we always manually zero the memory once we are done */ + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); if (!page) return NULL; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { - /* remove any dirty cache lines on the kernel alias */ - if (!PageHighMem(page)) - arch_dma_prep_coherent(page, size); - /* return the page pointer as the opaque cookie */ - ret = page; - goto done; + /* + * dma_alloc_contiguous can return highmem pages depending on a + * combination the cma= arguments and per-arch setup. These need to be + * remapped to return a kernel virtual address. + */ + if (PageHighMem(page)) { + remap = true; + set_uncached = false; } - if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) || - (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) { + if (remap) { + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); + + if (force_dma_unencrypted(dev)) + prot = pgprot_decrypted(prot); + /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); /* create a coherent mapping */ - ret = dma_common_contiguous_remap(page, size, - dma_pgprot(dev, PAGE_KERNEL, attrs), + ret = dma_common_contiguous_remap(page, size, prot, __builtin_return_address(0)); if (!ret) goto out_free_pages; - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); - if (err) - goto out_free_pages; - } - memset(ret, 0, size); - goto done; - } - - if (PageHighMem(page)) { - /* - * Depending on the cma= arguments and per-arch setup - * dma_alloc_contiguous could return highmem pages. - * Without remapping there is no way to return them here, - * so log an error and fail. - */ - dev_info(dev, "Rejecting highmem page from CMA.\n"); - goto out_free_pages; - } - - ret = page_address(page); - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); - if (err) - goto out_free_pages; + } else { + ret = page_address(page); + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; } memset(ret, 0, size); - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - dma_alloc_need_uncached(dev, attrs)) { + if (set_uncached) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) goto out_encrypt_pages; } -done: - if (force_dma_unencrypted(dev)) - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - else - *dma_handle = phys_to_dma(dev, page_to_phys(page)); + + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: - if (force_dma_unencrypted(dev)) { - err = set_memory_encrypted((unsigned long)page_address(page), - 1 << get_order(size)); - /* If memory cannot be re-encrypted, it must be leaked */ - if (err) - return NULL; - } + if (dma_set_encrypted(dev, page_address(page), size)) + return NULL; out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); + return NULL; +out_leak_pages: return NULL; } -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int page_order = get_order(size); - /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ - if (dma_should_free_from_pool(dev, attrs) && - dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) - return; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ dma_free_contiguous(dev, cpu_addr, size); return; } - if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && + !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) { + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + return; + } - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + !dev_is_dma_coherent(dev)) { + if (!dma_release_from_global_coherent(page_order, cpu_addr)) + WARN_ON_ONCE(1); + return; + } + + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) + return; + + if (is_vmalloc_addr(cpu_addr)) { vunmap(cpu_addr); - else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) - arch_dma_clear_uncached(cpu_addr, size); + } else { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) + arch_dma_clear_uncached(cpu_addr, size); + if (dma_set_encrypted(dev, cpu_addr, size)) + return; + } - dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); + __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } -void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} + struct page *page; + void *ret; -void dma_direct_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); - else - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); + if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); + + page = __dma_direct_alloc_pages(dev, size, gfp, false); + if (!page) + return NULL; + + ret = page_address(page); + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; + memset(ret, 0, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +out_leak_pages: + return NULL; } -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir) { - phys_addr_t paddr = dma_to_phys(dev, addr); + void *vaddr = page_address(page); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(dev, vaddr, size)) + return; - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); + if (dma_set_encrypted(dev, vaddr, size)) + return; + __dma_direct_free_pages(dev, page, size); } -EXPORT_SYMBOL(dma_direct_sync_single_for_device); +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { @@ -326,36 +404,20 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, sg->length, - dir, SYNC_FOR_DEVICE); + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_device(dev, paddr, sg->length, + dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, dir); } } -EXPORT_SYMBOL(dma_direct_sync_sg_for_device); #endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ defined(CONFIG_SWIOTLB) -void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); -} -EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); - void dma_direct_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir) { @@ -368,79 +430,72 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, - SYNC_FOR_CPU); + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, + dir); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, sg->length); } if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu_all(); } -EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); - -void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - - if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); -} -EXPORT_SYMBOL(dma_direct_unmap_page); +/* + * Unmaps segments, except for ones marked as pci_p2pdma which do not + * require any further action as they contain a bus address. + */ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nents, i) - dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} -EXPORT_SYMBOL(dma_direct_unmap_sg); -#endif - -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - if (swiotlb_force != SWIOTLB_NO_FORCE) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; + for_each_sg(sgl, sg, nents, i) { + if (sg_dma_is_bus_address(sg)) + sg_dma_unmark_bus_address(sg); + else + dma_direct_unmap_page(dev, sg->dma_address, + sg_dma_len(sg), dir, attrs); } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; } -EXPORT_SYMBOL(dma_direct_map_page); +#endif int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { - int i; + struct pci_p2pdma_map_state p2pdma_state = {}; + enum pci_p2pdma_map_type map; struct scatterlist *sg; + int i, ret; for_each_sg(sgl, sg, nents, i) { + if (is_pci_p2pdma_page(sg_page(sg))) { + map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg); + switch (map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + continue; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Any P2P mapping that traverses the PCI + * host bridge must be mapped with CPU physical + * address and not PCI bus addresses. This is + * done with dma_direct_map_page() below. + */ + break; + default: + ret = -EREMOTEIO; + goto out_unmap; + } + } + sg->dma_address = dma_direct_map_page(dev, sg_page(sg), sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) + if (sg->dma_address == DMA_MAPPING_ERROR) { + ret = -EIO; goto out_unmap; + } sg_dma_len(sg) = sg->length; } @@ -448,9 +503,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, out_unmap: dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return 0; + return ret; } -EXPORT_SYMBOL(dma_direct_map_sg); dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -467,7 +521,6 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, return dma_addr; } -EXPORT_SYMBOL(dma_direct_map_resource); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, @@ -498,9 +551,13 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + if (force_dma_unencrypted(dev)) + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; + if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) + return ret; if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff) return -ENXIO; @@ -522,20 +579,60 @@ int dma_direct_supported(struct device *dev, u64 mask) return 1; /* - * This check needs to be against the actual bit mask value, so - * use __phys_to_dma() here so that the SME encryption mask isn't + * This check needs to be against the actual bit mask value, so use + * phys_to_dma_unencrypted() here so that the SME encryption mask isn't * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); - return mask >= __phys_to_dma(dev, min_mask); + return mask >= phys_to_dma_unencrypted(dev, min_mask); +} + +/* + * To check whether all ram resource ranges are covered by dma range map + * Returns 0 when further check is needed + * Returns 1 if there is some RAM range can't be covered by dma_range_map + */ +static int check_ram_in_range_map(unsigned long start_pfn, + unsigned long nr_pages, void *data) +{ + unsigned long end_pfn = start_pfn + nr_pages; + const struct bus_dma_region *bdr = NULL; + const struct bus_dma_region *m; + struct device *dev = data; + + while (start_pfn < end_pfn) { + for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { + unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); + + if (start_pfn >= cpu_start_pfn && + start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) { + bdr = m; + break; + } + } + if (!bdr) + return 1; + + start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); + } + + return 0; +} + +bool dma_direct_all_ram_mapped(struct device *dev) +{ + if (!dev->dma_range_map) + return true; + return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, + check_ram_in_range_map); } size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ - if (is_swiotlb_active() && - (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) + if (is_swiotlb_active(dev) && + (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev))) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } @@ -543,5 +640,45 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); + is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); +} + +/** + * dma_direct_set_offset - Assign scalar offset for a single DMA range. + * @dev: device pointer; needed to "own" the alloced memory. + * @cpu_start: beginning of memory region covered by this offset. + * @dma_start: beginning of DMA/PCI region covered by this offset. + * @size: size of the region. + * + * This is for the simple case of a uniform offset which cannot + * be discovered by "dma-ranges". + * + * It returns -ENOMEM if out of memory, -EINVAL if a map + * already exists, 0 otherwise. + * + * Note: any call to this from a driver is a bug. The mapping needs + * to be described by the device tree or other firmware interfaces. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size) +{ + struct bus_dma_region *map; + u64 offset = (u64)cpu_start - (u64)dma_start; + + if (dev->dma_range_map) { + dev_err(dev, "attempt to add DMA range to existing map\n"); + return -EINVAL; + } + + if (!offset) + return 0; + + map = kcalloc(2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map[0].cpu_start = cpu_start; + map[0].dma_start = dma_start; + map[0].size = size; + dev->dma_range_map = map; + return 0; } diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h new file mode 100644 index 000000000000..18d346118fe8 --- /dev/null +++ b/kernel/dma/direct.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without using an IOMMU. + */ +#ifndef _KERNEL_DMA_DIRECT_H +#define _KERNEL_DMA_DIRECT_H + +#include <linux/dma-direct.h> +#include <linux/memremap.h> + +int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_can_mmap(struct device *dev); +int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); +bool dma_direct_all_ram_mapped(struct device *dev); +size_t dma_direct_max_mapping_size(struct device *dev); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_device(dev, paddr, size, dir); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, size, dir); +} + +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); + + if (is_swiotlb_force_bounce(dev)) { + if (is_pci_p2pdma_page(page)) + return DMA_MAPPING_ERROR; + return swiotlb_map(dev, phys, size, dir, attrs); + } + + if (unlikely(!dma_capable(dev, dma_addr, size, true)) || + dma_kmalloc_needs_bounce(dev, size, dir)) { + if (is_pci_p2pdma_page(page)) + return DMA_MAPPING_ERROR; + if (is_swiotlb_active(dev)) + return swiotlb_map(dev, phys, size, dir, attrs); + + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; + } + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + return dma_addr; +} + +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = dma_to_phys(dev, addr); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(dev, phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); +} +#endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index 05607642c888..b492d59ac77e 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -2,7 +2,7 @@ /* * Dummy DMA ops that always fail. */ -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, @@ -22,7 +22,7 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { - return 0; + return -EINVAL; } static int dma_dummy_supported(struct device *hwdev, u64 mask) @@ -36,4 +36,3 @@ const struct dma_map_ops dma_dummy_ops = { .map_sg = dma_dummy_map_sg, .dma_supported = dma_dummy_supported, }; -EXPORT_SYMBOL(dma_dummy_ops); diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c new file mode 100644 index 000000000000..02205ab53b7e --- /dev/null +++ b/kernel/dma/map_benchmark.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 HiSilicon Limited. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/map_benchmark.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/timekeeping.h> + +struct map_benchmark_data { + struct map_benchmark bparam; + struct device *dev; + struct dentry *debugfs; + enum dma_data_direction dir; + atomic64_t sum_map_100ns; + atomic64_t sum_unmap_100ns; + atomic64_t sum_sq_map; + atomic64_t sum_sq_unmap; + atomic64_t loops; +}; + +static int map_benchmark_thread(void *data) +{ + void *buf; + dma_addr_t dma_addr; + struct map_benchmark_data *map = data; + int npages = map->bparam.granule; + u64 size = npages * PAGE_SIZE; + int ret = 0; + + buf = alloc_pages_exact(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + while (!kthread_should_stop()) { + u64 map_100ns, unmap_100ns, map_sq, unmap_sq; + ktime_t map_stime, map_etime, unmap_stime, unmap_etime; + ktime_t map_delta, unmap_delta; + + /* + * for a non-coherent device, if we don't stain them in the + * cache, this will give an underestimate of the real-world + * overhead of BIDIRECTIONAL or TO_DEVICE mappings; + * 66 means evertything goes well! 66 is lucky. + */ + if (map->dir != DMA_FROM_DEVICE) + memset(buf, 0x66, size); + + map_stime = ktime_get(); + dma_addr = dma_map_single(map->dev, buf, size, map->dir); + if (unlikely(dma_mapping_error(map->dev, dma_addr))) { + pr_err("dma_map_single failed on %s\n", + dev_name(map->dev)); + ret = -ENOMEM; + goto out; + } + map_etime = ktime_get(); + map_delta = ktime_sub(map_etime, map_stime); + + /* Pretend DMA is transmitting */ + ndelay(map->bparam.dma_trans_ns); + + unmap_stime = ktime_get(); + dma_unmap_single(map->dev, dma_addr, size, map->dir); + unmap_etime = ktime_get(); + unmap_delta = ktime_sub(unmap_etime, unmap_stime); + + /* calculate sum and sum of squares */ + + map_100ns = div64_ul(map_delta, 100); + unmap_100ns = div64_ul(unmap_delta, 100); + map_sq = map_100ns * map_100ns; + unmap_sq = unmap_100ns * unmap_100ns; + + atomic64_add(map_100ns, &map->sum_map_100ns); + atomic64_add(unmap_100ns, &map->sum_unmap_100ns); + atomic64_add(map_sq, &map->sum_sq_map); + atomic64_add(unmap_sq, &map->sum_sq_unmap); + atomic64_inc(&map->loops); + } + +out: + free_pages_exact(buf, size); + return ret; +} + +static int do_map_benchmark(struct map_benchmark_data *map) +{ + struct task_struct **tsk; + int threads = map->bparam.threads; + int node = map->bparam.node; + const cpumask_t *cpu_mask = cpumask_of_node(node); + u64 loops; + int ret = 0; + int i; + + tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + get_device(map->dev); + + for (i = 0; i < threads; i++) { + tsk[i] = kthread_create_on_node(map_benchmark_thread, map, + map->bparam.node, "dma-map-benchmark/%d", i); + if (IS_ERR(tsk[i])) { + pr_err("create dma_map thread failed\n"); + ret = PTR_ERR(tsk[i]); + goto out; + } + + if (node != NUMA_NO_NODE) + kthread_bind_mask(tsk[i], cpu_mask); + } + + /* clear the old value in the previous benchmark */ + atomic64_set(&map->sum_map_100ns, 0); + atomic64_set(&map->sum_unmap_100ns, 0); + atomic64_set(&map->sum_sq_map, 0); + atomic64_set(&map->sum_sq_unmap, 0); + atomic64_set(&map->loops, 0); + + for (i = 0; i < threads; i++) { + get_task_struct(tsk[i]); + wake_up_process(tsk[i]); + } + + msleep_interruptible(map->bparam.seconds * 1000); + + /* wait for the completion of benchmark threads */ + for (i = 0; i < threads; i++) { + ret = kthread_stop(tsk[i]); + if (ret) + goto out; + } + + loops = atomic64_read(&map->loops); + if (likely(loops > 0)) { + u64 map_variance, unmap_variance; + u64 sum_map = atomic64_read(&map->sum_map_100ns); + u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns); + u64 sum_sq_map = atomic64_read(&map->sum_sq_map); + u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap); + + /* average latency */ + map->bparam.avg_map_100ns = div64_u64(sum_map, loops); + map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops); + + /* standard deviation of latency */ + map_variance = div64_u64(sum_sq_map, loops) - + map->bparam.avg_map_100ns * + map->bparam.avg_map_100ns; + unmap_variance = div64_u64(sum_sq_unmap, loops) - + map->bparam.avg_unmap_100ns * + map->bparam.avg_unmap_100ns; + map->bparam.map_stddev = int_sqrt64(map_variance); + map->bparam.unmap_stddev = int_sqrt64(unmap_variance); + } + +out: + for (i = 0; i < threads; i++) + put_task_struct(tsk[i]); + put_device(map->dev); + kfree(tsk); + return ret; +} + +static long map_benchmark_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct map_benchmark_data *map = file->private_data; + void __user *argp = (void __user *)arg; + u64 old_dma_mask; + int ret; + + if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) + return -EFAULT; + + switch (cmd) { + case DMA_MAP_BENCHMARK: + if (map->bparam.threads == 0 || + map->bparam.threads > DMA_MAP_MAX_THREADS) { + pr_err("invalid thread number\n"); + return -EINVAL; + } + + if (map->bparam.seconds == 0 || + map->bparam.seconds > DMA_MAP_MAX_SECONDS) { + pr_err("invalid duration seconds\n"); + return -EINVAL; + } + + if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) { + pr_err("invalid transmission delay\n"); + return -EINVAL; + } + + if (map->bparam.node != NUMA_NO_NODE && + !node_possible(map->bparam.node)) { + pr_err("invalid numa node\n"); + return -EINVAL; + } + + if (map->bparam.granule < 1 || map->bparam.granule > 1024) { + pr_err("invalid granule size\n"); + return -EINVAL; + } + + switch (map->bparam.dma_dir) { + case DMA_MAP_BIDIRECTIONAL: + map->dir = DMA_BIDIRECTIONAL; + break; + case DMA_MAP_FROM_DEVICE: + map->dir = DMA_FROM_DEVICE; + break; + case DMA_MAP_TO_DEVICE: + map->dir = DMA_TO_DEVICE; + break; + default: + pr_err("invalid DMA direction\n"); + return -EINVAL; + } + + old_dma_mask = dma_get_mask(map->dev); + + ret = dma_set_mask(map->dev, + DMA_BIT_MASK(map->bparam.dma_bits)); + if (ret) { + pr_err("failed to set dma_mask on device %s\n", + dev_name(map->dev)); + return -EINVAL; + } + + ret = do_map_benchmark(map); + + /* + * restore the original dma_mask as many devices' dma_mask are + * set by architectures, acpi, busses. When we bind them back + * to their original drivers, those drivers shouldn't see + * dma_mask changed by benchmark + */ + dma_set_mask(map->dev, old_dma_mask); + break; + default: + return -EINVAL; + } + + if (copy_to_user(argp, &map->bparam, sizeof(map->bparam))) + return -EFAULT; + + return ret; +} + +static const struct file_operations map_benchmark_fops = { + .open = simple_open, + .unlocked_ioctl = map_benchmark_ioctl, +}; + +static void map_benchmark_remove_debugfs(void *data) +{ + struct map_benchmark_data *map = (struct map_benchmark_data *)data; + + debugfs_remove(map->debugfs); +} + +static int __map_benchmark_probe(struct device *dev) +{ + struct dentry *entry; + struct map_benchmark_data *map; + int ret; + + map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map->dev = dev; + + ret = devm_add_action(dev, map_benchmark_remove_debugfs, map); + if (ret) { + pr_err("Can't add debugfs remove action\n"); + return ret; + } + + /* + * we only permit a device bound with this driver, 2nd probe + * will fail + */ + entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map, + &map_benchmark_fops); + if (IS_ERR(entry)) + return PTR_ERR(entry); + map->debugfs = entry; + + return 0; +} + +static int map_benchmark_platform_probe(struct platform_device *pdev) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct platform_driver map_benchmark_platform_driver = { + .driver = { + .name = "dma_map_benchmark", + }, + .probe = map_benchmark_platform_probe, +}; + +static int +map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + return __map_benchmark_probe(&pdev->dev); +} + +static struct pci_driver map_benchmark_pci_driver = { + .name = "dma_map_benchmark", + .probe = map_benchmark_pci_probe, +}; + +static int __init map_benchmark_init(void) +{ + int ret; + + ret = pci_register_driver(&map_benchmark_pci_driver); + if (ret) + return ret; + + ret = platform_driver_register(&map_benchmark_platform_driver); + if (ret) { + pci_unregister_driver(&map_benchmark_pci_driver); + return ret; + } + + return 0; +} + +static void __exit map_benchmark_cleanup(void) +{ + platform_driver_unregister(&map_benchmark_platform_driver); + pci_unregister_driver(&map_benchmark_pci_driver); +} + +module_init(map_benchmark_init); +module_exit(map_benchmark_cleanup); + +MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>"); +MODULE_DESCRIPTION("dma_map benchmark driver"); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index a8c18c9a796f..58db8fd70471 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -7,13 +7,21 @@ */ #include <linux/memblock.h> /* for max_pfn */ #include <linux/acpi.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> +#include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/gfp.h> +#include <linux/kmsan.h> #include <linux/of_device.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include "debug.h" +#include "direct.h" + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +bool dma_default_coherent = IS_ENABLED(CONFIG_ARCH_DMA_DEFAULT_COHERENT); +#endif /* * Managed DMA API @@ -105,21 +113,277 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, } EXPORT_SYMBOL(dmam_alloc_attrs); +static bool dma_go_direct(struct device *dev, dma_addr_t mask, + const struct dma_map_ops *ops) +{ + if (likely(!ops)) + return true; +#ifdef CONFIG_DMA_OPS_BYPASS + if (dev->dma_ops_bypass) + return min_not_zero(mask, dev->bus_dma_limit) >= + dma_direct_get_required_mask(dev); +#endif + return false; +} + + /* - * Create scatter-list for the already allocated DMA buffer. + * Check if the devices uses a direct mapping for streaming DMA operations. + * This allows IOMMU drivers to set a bypass mode if the DMA mask is large + * enough. */ -int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +static inline bool dma_alloc_direct(struct device *dev, + const struct dma_map_ops *ops) +{ + return dma_go_direct(dev, dev->coherent_dma_mask, ops); +} + +static inline bool dma_map_direct(struct device *dev, + const struct dma_map_ops *ops) +{ + return dma_go_direct(dev, *dev->dma_mask, ops); +} + +dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr; + + BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + + if (dma_map_direct(dev, ops) || + arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) + addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else + addr = ops->map_page(dev, page, offset, size, dir, attrs); + kmsan_handle_dma(page, offset, size, dir); + debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); + + return addr; +} +EXPORT_SYMBOL(dma_map_page_attrs); + +void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops) || + arch_dma_unmap_page_direct(dev, addr + size)) + dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (ops->unmap_page) + ops->unmap_page(dev, addr, size, dir, attrs); + debug_dma_unmap_page(dev, addr, size, dir); +} +EXPORT_SYMBOL(dma_unmap_page_attrs); + +static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + int ents; + + BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return 0; + + if (dma_map_direct(dev, ops) || + arch_dma_map_sg_direct(dev, sg, nents)) + ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else + ents = ops->map_sg(dev, sg, nents, dir, attrs); + + if (ents > 0) { + kmsan_handle_dma_sg(sg, nents, dir); + debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); + } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && + ents != -EIO && ents != -EREMOTEIO)) { + return -EIO; + } + + return ents; +} + +/** + * dma_map_sg_attrs - Map the given buffer for DMA + * @dev: The device for which to perform the DMA operation + * @sg: The sg_table object describing the buffer + * @nents: Number of entries to map + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Maps a buffer described by a scatterlist passed in the sg argument with + * nents segments for the @dir DMA operation by the @dev device. + * + * Returns the number of mapped entries (which can be less than nents) + * on success. Zero is returned for any error. + * + * dma_unmap_sg_attrs() should be used to unmap the buffer with the + * original sg and original nents (not the value returned by this funciton). + */ +unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); int ret; - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs); + if (ret < 0) + return 0; return ret; } +EXPORT_SYMBOL(dma_map_sg_attrs); + +/** + * dma_map_sgtable - Map the given buffer for DMA + * @dev: The device for which to perform the DMA operation + * @sgt: The sg_table object describing the buffer + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Maps a buffer described by a scatterlist stored in the given sg_table + * object for the @dir DMA operation by the @dev device. After success, the + * ownership for the buffer is transferred to the DMA domain. One has to + * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the + * ownership of the buffer back to the CPU domain before touching the + * buffer by the CPU. + * + * Returns 0 on success or a negative error code on error. The following + * error codes are supported with the given meaning: + * + * -EINVAL An invalid argument, unaligned access or other error + * in usage. Will not succeed if retried. + * -ENOMEM Insufficient resources (like memory or IOVA space) to + * complete the mapping. Should succeed if retried later. + * -EIO Legacy error code with an unknown meaning. eg. this is + * returned if a lower level call returned + * DMA_MAPPING_ERROR. + * -EREMOTEIO The DMA device cannot access P2PDMA memory specified + * in the sg_table. This will not succeed if retried. + */ +int dma_map_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + int nents; + + nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs); + if (nents < 0) + return nents; + sgt->nents = nents; + return 0; +} +EXPORT_SYMBOL_GPL(dma_map_sgtable); + +void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + debug_dma_unmap_sg(dev, sg, nents, dir); + if (dma_map_direct(dev, ops) || + arch_dma_unmap_sg_direct(dev, sg, nents)) + dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) + ops->unmap_sg(dev, sg, nents, dir, attrs); +} +EXPORT_SYMBOL(dma_unmap_sg_attrs); + +dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr = DMA_MAPPING_ERROR; + + BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + + if (dma_map_direct(dev, ops)) + addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (ops->map_resource) + addr = ops->map_resource(dev, phys_addr, size, dir, attrs); + + debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); + return addr; +} +EXPORT_SYMBOL(dma_map_resource); + +void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (!dma_map_direct(dev, ops) && ops->unmap_resource) + ops->unmap_resource(dev, addr, size, dir, attrs); + debug_dma_unmap_resource(dev, addr, size, dir); +} +EXPORT_SYMBOL(dma_unmap_resource); + +void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(dev, addr, size, dir); + debug_dma_sync_single_for_cpu(dev, addr, size, dir); +} +EXPORT_SYMBOL(dma_sync_single_for_cpu); + +void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops)) + dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (ops->sync_single_for_device) + ops->sync_single_for_device(dev, addr, size, dir); + debug_dma_sync_single_for_device(dev, addr, size, dir); +} +EXPORT_SYMBOL(dma_sync_single_for_device); + +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops)) + dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(dev, sg, nelems, dir); + debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (dma_map_direct(dev, ops)) + dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (ops->sync_sg_for_device) + ops->sync_sg_for_device(dev, sg, nelems, dir); + debug_dma_sync_sg_for_device(dev, sg, nelems, dir); +} +EXPORT_SYMBOL(dma_sync_sg_for_device); /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -138,7 +402,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); if (!ops->get_sgtable) @@ -154,11 +418,7 @@ EXPORT_SYMBOL(dma_get_sgtable_attrs); */ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { - if (force_dma_unencrypted(dev)) - prot = pgprot_decrypted(prot); - if (dev_is_dma_coherent(dev) || - (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT))) + if (dev_is_dma_coherent(dev)) return prot; #ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE if (attrs & DMA_ATTR_WRITE_COMBINE) @@ -168,35 +428,6 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) } #endif /* CONFIG_MMU */ -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off >= count || user_count > count - off) - return -ENXIO; - - return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, - user_count << PAGE_SHIFT, vma->vm_page_prot); -#else - return -ENXIO; -#endif /* CONFIG_MMU */ -} - /** * dma_can_mmap - check if a given device supports dma_mmap_* * @dev: device to check @@ -208,7 +439,7 @@ bool dma_can_mmap(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); return ops->mmap != NULL; } @@ -233,7 +464,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); if (!ops->mmap) @@ -246,7 +477,7 @@ u64 dma_get_required_mask(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) return dma_direct_get_required_mask(dev); if (ops->get_required_mask) return ops->get_required_mask(dev); @@ -271,20 +502,28 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, WARN_ON_ONCE(!dev->coherent_dma_mask); + /* + * DMA allocations can never be turned back into a page pointer, so + * requesting compound pages doesn't make sense (and can't even be + * supported at all by various backends). + */ + if (WARN_ON_ONCE(flag & __GFP_COMP)) + return NULL; + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); else if (ops->alloc) cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); else return NULL; - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } EXPORT_SYMBOL(dma_alloc_attrs); @@ -309,30 +548,217 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - if (dma_is_direct(ops)) + if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } EXPORT_SYMBOL(dma_free_attrs); -int dma_supported(struct device *dev, u64 mask) +static struct page *__dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (WARN_ON_ONCE(!dev->coherent_dma_mask)) + return NULL; + if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM))) + return NULL; + if (WARN_ON_ONCE(gfp & __GFP_COMP)) + return NULL; + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!ops->alloc_pages) + return NULL; + return ops->alloc_pages(dev, size, dma_handle, dir, gfp); +} + +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); + + if (page) + debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + return page; +} +EXPORT_SYMBOL_GPL(dma_alloc_pages); + +static void __dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (ops->free_pages) + ops->free_pages(dev, size, page, dma_handle, dir); +} + +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + debug_dma_unmap_page(dev, dma_handle, size, dir); + __dma_free_pages(dev, size, page, dma_handle, dir); +} +EXPORT_SYMBOL_GPL(dma_free_pages); + +int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma, + size_t size, struct page *page) +{ + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (vma->vm_pgoff >= count || vma_pages(vma) > count - vma->vm_pgoff) + return -ENXIO; + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(page) + vma->vm_pgoff, + vma_pages(vma) << PAGE_SHIFT, vma->vm_page_prot); +} +EXPORT_SYMBOL_GPL(dma_mmap_pages); + +static struct sg_table *alloc_single_sgt(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp) +{ + struct sg_table *sgt; + struct page *page; + + sgt = kmalloc(sizeof(*sgt), gfp); + if (!sgt) + return NULL; + if (sg_alloc_table(sgt, 1, gfp)) + goto out_free_sgt; + page = __dma_alloc_pages(dev, size, &sgt->sgl->dma_address, dir, gfp); + if (!page) + goto out_free_table; + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + sg_dma_len(sgt->sgl) = sgt->sgl->length; + return sgt; +out_free_table: + sg_free_table(sgt); +out_free_sgt: + kfree(sgt); + return NULL; +} + +struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct sg_table *sgt; + + if (WARN_ON_ONCE(attrs & ~DMA_ATTR_ALLOC_SINGLE_PAGES)) + return NULL; + if (WARN_ON_ONCE(gfp & __GFP_COMP)) + return NULL; + + if (ops && ops->alloc_noncontiguous) + sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); + else + sgt = alloc_single_sgt(dev, size, dir, gfp); + + if (sgt) { + sgt->nents = 1; + debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + } + return sgt; +} +EXPORT_SYMBOL_GPL(dma_alloc_noncontiguous); + +static void free_single_sgt(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + __dma_free_pages(dev, size, sg_page(sgt->sgl), sgt->sgl->dma_address, + dir); + sg_free_table(sgt); + kfree(sgt); +} + +void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); + if (ops && ops->free_noncontiguous) + ops->free_noncontiguous(dev, size, sgt, dir); + else + free_single_sgt(dev, size, sgt, dir); +} +EXPORT_SYMBOL_GPL(dma_free_noncontiguous); + +void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (ops && ops->alloc_noncontiguous) + return vmap(sgt_handle(sgt)->pages, count, VM_MAP, PAGE_KERNEL); + return page_address(sg_page(sgt->sgl)); +} +EXPORT_SYMBOL_GPL(dma_vmap_noncontiguous); + +void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops && ops->alloc_noncontiguous) + vunmap(vaddr); +} +EXPORT_SYMBOL_GPL(dma_vunmap_noncontiguous); + +int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (ops && ops->alloc_noncontiguous) { + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (vma->vm_pgoff >= count || + vma_pages(vma) > count - vma->vm_pgoff) + return -ENXIO; + return vm_map_pages(vma, sgt_handle(sgt)->pages, count); + } + return dma_mmap_pages(dev, vma, size, sg_page(sgt->sgl)); +} +EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous); + +static int dma_supported(struct device *dev, u64 mask) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + /* + * ->dma_supported sets the bypass flag, so we must always call + * into the method here unless the device is truly direct mapped. + */ + if (!ops) return dma_direct_supported(dev, mask); if (!ops->dma_supported) return 1; return ops->dma_supported(dev, mask); } -EXPORT_SYMBOL(dma_supported); -#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK -void arch_dma_set_mask(struct device *dev, u64 mask); -#else -#define arch_dma_set_mask(dev, mask) do { } while (0) -#endif +bool dma_pci_p2pdma_supported(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + /* if ops is not set, dma direct will be used which supports P2PDMA */ + if (!ops) + return true; + + /* + * Note: dma_ops_bypass is not checked here because P2PDMA should + * not be used with dma mapping ops that do not have support even + * if the specific device is bypassing them. + */ + + return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; +} +EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); int dma_set_mask(struct device *dev, u64 mask) { @@ -351,7 +777,6 @@ int dma_set_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_mask); -#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) { /* @@ -367,28 +792,35 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) return 0; } EXPORT_SYMBOL(dma_set_coherent_mask); -#endif -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +bool dma_addressing_limited(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!valid_dma_direction(dir)); + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; - if (dma_is_direct(ops)) - arch_dma_cache_sync(dev, vaddr, size, dir); - else if (ops->cache_sync) - ops->cache_sync(dev, vaddr, size, dir); + if (unlikely(ops)) + return false; + return !dma_direct_all_ram_mapped(dev); } -EXPORT_SYMBOL(dma_cache_sync); +EXPORT_SYMBOL_GPL(dma_addressing_limited); size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); size_t size = SIZE_MAX; - if (dma_is_direct(ops)) + if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -397,11 +829,23 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); +size_t dma_opt_mapping_size(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + size_t size = SIZE_MAX; + + if (ops && ops->opt_mapping_size) + size = ops->opt_mapping_size(); + + return min(dma_max_mapping_size(dev), size); +} +EXPORT_SYMBOL_GPL(dma_opt_mapping_size); + bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_is_direct(ops)) + if (dma_map_direct(dev, ops)) return dma_direct_need_sync(dev, dma_addr); return ops->sync_single_for_cpu || ops->sync_single_for_device; } diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c new file mode 100644 index 000000000000..af4a6ef48ce0 --- /dev/null +++ b/kernel/dma/ops_helpers.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for DMA ops implementations. These generally rely on the fact that + * the allocated memory contains normal pages in the direct kernel mapping. + */ +#include <linux/dma-map-ops.h> + +static struct page *dma_common_vaddr_to_page(void *cpu_addr) +{ + if (is_vmalloc_addr(cpu_addr)) + return vmalloc_to_page(cpu_addr); + return virt_to_page(cpu_addr); +} + +/* + * Create scatter-list for the already allocated DMA buffer. + */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + struct page *page = dma_common_vaddr_to_page(cpu_addr); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; +} + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + struct page *page = dma_common_vaddr_to_page(cpu_addr); + int ret = -ENXIO; + + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off >= count || user_count > count - off) + return -ENXIO; + + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(page) + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* CONFIG_MMU */ +} + +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct page *page; + + page = dma_alloc_contiguous(dev, size, gfp); + if (!page) + page = alloc_pages_node(dev_to_node(dev), gfp, get_order(size)); + if (!page) + return NULL; + + *dma_handle = ops->map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + if (*dma_handle == DMA_MAPPING_ERROR) { + dma_free_contiguous(dev, page, size); + return NULL; + } + + memset(page_address(page), 0, size); + return page; +} + +void dma_common_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->unmap_page) + ops->unmap_page(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + dma_free_contiguous(dev, page, size); +} diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 6bc74a2d5127..d10613eb0f63 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -3,9 +3,10 @@ * Copyright (C) 2012 ARM Ltd. * Copyright (C) 2020 Google LLC */ +#include <linux/cma.h> #include <linux/debugfs.h> +#include <linux/dma-map-ops.h> #include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> #include <linux/init.h> #include <linux/genalloc.h> #include <linux/set_memory.h> @@ -37,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void) struct dentry *root; root = debugfs_create_dir("dma_pools", NULL); - if (IS_ERR_OR_NULL(root)) - return; - debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma); debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32); debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel); @@ -55,20 +53,47 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) pool_size_kernel += size; } +static bool cma_in_zone(gfp_t gfp) +{ + unsigned long size; + phys_addr_t end; + struct cma *cma; + + cma = dev_get_cma_area(NULL); + if (!cma) + return false; + + size = cma_get_size(cma); + if (!size) + return false; + + /* CMA can't cross zone boundaries, see cma_activate_area() */ + end = cma_get_base(cma) + size - 1; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + return end <= DMA_BIT_MASK(zone_dma_bits); + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return end <= DMA_BIT_MASK(32); + return true; +} + static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { unsigned int order; - struct page *page; + struct page *page = NULL; void *addr; int ret = -ENOMEM; - /* Cannot allocate larger than MAX_ORDER-1 */ - order = min(get_order(pool_size), MAX_ORDER-1); + /* Cannot allocate larger than MAX_PAGE_ORDER */ + order = min(get_order(pool_size), MAX_PAGE_ORDER); do { pool_size = 1 << (PAGE_SHIFT + order); - page = alloc_pages(gfp, order); + if (cma_in_zone(gfp)) + page = dma_alloc_from_contiguous(NULL, 1 << order, + order, false); + if (!page) + page = alloc_pages(gfp, order); } while (!page && order-- > 0); if (!page) goto out; @@ -86,7 +111,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #endif /* * Memory in the atomic DMA pools must be unencrypted, the pools do not - * shrink so no re-encryption occurs in dma_direct_free_pages(). + * shrink so no re-encryption occurs in dma_direct_free(). */ ret = set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order); @@ -110,9 +135,9 @@ encrypt_mapping: remove_mapping: #ifdef CONFIG_DMA_DIRECT_REMAP dma_common_free_remap(addr, pool_size); -#endif -free_page: __maybe_unused +free_page: __free_pages(page, order); +#endif out: return ret; } @@ -165,7 +190,7 @@ static int __init dma_atomic_pool_init(void) /* * If coherent_pool was not used on the command line, default the pool - * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1. + * sizes to 128KB per 1GB of memory, min 128KB, max MAX_PAGE_ORDER. */ if (!atomic_pool_size) { unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); @@ -178,7 +203,7 @@ static int __init dma_atomic_pool_init(void) GFP_KERNEL); if (!atomic_pool_kernel) ret = -ENOMEM; - if (IS_ENABLED(CONFIG_ZONE_DMA)) { + if (has_managed_dma()) { atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, GFP_KERNEL | GFP_DMA); if (!atomic_pool_dma) @@ -196,93 +221,75 @@ static int __init dma_atomic_pool_init(void) } postcore_initcall(dma_atomic_pool_init); -static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev) +static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) { - u64 phys_mask; - gfp_t gfp; - - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA) + if (prev == NULL) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + return atomic_pool_dma32; + if (atomic_pool_dma && (gfp & GFP_DMA)) + return atomic_pool_dma; + return atomic_pool_kernel; + } + if (prev == atomic_pool_kernel) + return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma; + if (prev == atomic_pool_dma32) return atomic_pool_dma; - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32) - return atomic_pool_dma32; - return atomic_pool_kernel; + return NULL; } -static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool) +static struct page *__dma_alloc_from_pool(struct device *dev, size_t size, + struct gen_pool *pool, void **cpu_addr, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) { - if (bad_pool == atomic_pool_kernel) - return atomic_pool_dma32 ? : atomic_pool_dma; + unsigned long addr; + phys_addr_t phys; - if (bad_pool == atomic_pool_dma32) - return atomic_pool_dma; + addr = gen_pool_alloc(pool, size); + if (!addr) + return NULL; - return NULL; -} + phys = gen_pool_virt_to_phys(pool, addr); + if (phys_addr_ok && !phys_addr_ok(dev, phys, size)) { + gen_pool_free(pool, addr, size); + return NULL; + } -static inline struct gen_pool *dma_guess_pool(struct device *dev, - struct gen_pool *bad_pool) -{ - if (bad_pool) - return dma_get_safer_pool(bad_pool); + if (gen_pool_avail(pool) < atomic_pool_size) + schedule_work(&atomic_pool_work); - return dma_guess_pool_from_device(dev); + *cpu_addr = (void *)addr; + memset(*cpu_addr, 0, size); + return pfn_to_page(__phys_to_pfn(phys)); } -void *dma_alloc_from_pool(struct device *dev, size_t size, - struct page **ret_page, gfp_t flags) +struct page *dma_alloc_from_pool(struct device *dev, size_t size, + void **cpu_addr, gfp_t gfp, + bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t)) { struct gen_pool *pool = NULL; - unsigned long val = 0; - void *ptr = NULL; - phys_addr_t phys; - - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) { - WARN(1, "Failed to get suitable pool for %s\n", - dev_name(dev)); - break; - } - - val = gen_pool_alloc(pool, size); - if (!val) - continue; - - phys = gen_pool_virt_to_phys(pool, val); - if (dma_coherent_ok(dev, phys, size)) - break; - - gen_pool_free(pool, val, size); - val = 0; - } - - - if (val) { - *ret_page = pfn_to_page(__phys_to_pfn(phys)); - ptr = (void *)val; - memset(ptr, 0, size); + struct page *page; - if (gen_pool_avail(pool) < atomic_pool_size) - schedule_work(&atomic_pool_work); + while ((pool = dma_guess_pool(pool, gfp))) { + page = __dma_alloc_from_pool(dev, size, pool, cpu_addr, + phys_addr_ok); + if (page) + return page; } - return ptr; + WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev)); + return NULL; } bool dma_free_from_pool(struct device *dev, void *start, size_t size) { struct gen_pool *pool = NULL; - while (1) { - pool = dma_guess_pool(dev, pool); - if (!pool) - return false; - - if (gen_pool_has_addr(pool, (unsigned long)start, size)) { - gen_pool_free(pool, (unsigned long)start, size); - return true; - } + while ((pool = dma_guess_pool(pool, 0))) { + if (!gen_pool_has_addr(pool, (unsigned long)start, size)) + continue; + gen_pool_free(pool, (unsigned long)start, size); + return true; } + + return false; } diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 78b23f089cf1..27596f3b4aef 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014 The Linux Foundation */ -#include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -43,13 +43,13 @@ void *dma_common_contiguous_remap(struct page *page, size_t size, void *vaddr; int i; - pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL); + pages = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); if (!pages) return NULL; for (i = 0; i < count; i++) pages[i] = nth_page(page, i); vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); - kfree(pages); + kvfree(pages); return vaddr; } @@ -66,6 +66,5 @@ void dma_common_free_remap(void *cpu_addr, size_t size) return; } - unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); vunmap(cpu_addr); } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index c19379fabd20..0de66f0ff43a 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -21,38 +21,37 @@ #define pr_fmt(fmt) "software IO TLB: " fmt #include <linux/cache.h> +#include <linux/cc_platform.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> #include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/mm.h> +#include <linux/dma-map-ops.h> #include <linux/export.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/iommu-helper.h> +#include <linux/init.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <linux/rculist.h> +#include <linux/scatterlist.h> +#include <linux/set_memory.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/swiotlb.h> -#include <linux/pfn.h> #include <linux/types.h> -#include <linux/ctype.h> -#include <linux/highmem.h> -#include <linux/gfp.h> -#include <linux/scatterlist.h> -#include <linux/mem_encrypt.h> -#include <linux/set_memory.h> -#ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> +#ifdef CONFIG_DMA_RESTRICTED_POOL +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/of_reserved_mem.h> +#include <linux/slab.h> #endif -#include <asm/io.h> -#include <asm/dma.h> - -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/iommu-helper.h> - #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* @@ -62,120 +61,192 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -enum swiotlb_force swiotlb_force; +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. +/** + * struct io_tlb_slot - IO TLB slot descriptor + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @list: The free list describing the number of free entries available + * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ -phys_addr_t io_tlb_start, io_tlb_end; +struct io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; + unsigned short list; + unsigned short pad_slots; +}; -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; +static bool swiotlb_force_bounce; +static bool swiotlb_force_disable; -/* - * The number of used IO TLB block - */ -static unsigned long io_tlb_used; +#ifdef CONFIG_SWIOTLB_DYNAMIC -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; +static void swiotlb_dyn_alloc(struct work_struct *work); -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). +static struct io_tlb_mem io_tlb_default_mem = { + .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock), + .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools), + .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc, + swiotlb_dyn_alloc), +}; + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static struct io_tlb_mem io_tlb_default_mem; + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; +static unsigned long default_nareas; + +/** + * struct io_tlb_area - IO TLB memory area descriptor + * + * This is a single area with a single lock. + * + * @used: The number of used IO TLB block. + * @index: The slot index to start searching in this area for next round. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. */ -unsigned int max_segment; +struct io_tlb_area { + unsigned long used; + unsigned int index; + spinlock_t lock; +}; /* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. + * Round up number of slabs to the next power of 2. The last area is going + * be smaller than the rest if default_nslabs is not power of two. + * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE, + * otherwise a segment may span two or more areas. It conflicts with free + * contiguous slots tracking: free slots are treated contiguous no matter + * whether they cross an area boundary. + * + * Return true if default_nslabs is rounded up. */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; +static bool round_up_default_nslabs(void) +{ + if (!default_nareas) + return false; + + if (default_nslabs < IO_TLB_SEGSIZE * default_nareas) + default_nslabs = IO_TLB_SEGSIZE * default_nareas; + else if (is_power_of_2(default_nslabs)) + return false; + default_nslabs = roundup_pow_of_two(default_nslabs); + return true; +} -/* - * Protect the above data structures in the map and unmap calls +/** + * swiotlb_adjust_nareas() - adjust the number of areas and slots + * @nareas: Desired number of areas. Zero is treated as 1. + * + * Adjust the default number of areas in a memory pool. + * The default size of the memory pool may also change to meet minimum area + * size requirements. */ -static DEFINE_SPINLOCK(io_tlb_lock); +static void swiotlb_adjust_nareas(unsigned int nareas) +{ + if (!nareas) + nareas = 1; + else if (!is_power_of_2(nareas)) + nareas = roundup_pow_of_two(nareas); + + default_nareas = nareas; + + pr_info("area num %d.\n", nareas); + if (round_up_default_nslabs()) + pr_info("SWIOTLB bounce buffer size roundup to %luMB", + (default_nslabs << IO_TLB_SHIFT) >> 20); +} -static int late_alloc; +/** + * limit_nareas() - get the maximum number of areas for a given memory pool size + * @nareas: Desired number of areas. + * @nslots: Total number of slots in the memory pool. + * + * Limit the number of areas to the maximum possible number of areas in + * a memory pool of the given size. + * + * Return: Maximum possible number of areas. + */ +static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) +{ + if (nslots < nareas * IO_TLB_SEGSIZE) + return nslots / IO_TLB_SEGSIZE; + return nareas; +} static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + default_nslabs = + ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); } if (*str == ',') ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } + if (isdigit(*str)) + swiotlb_adjust_nareas(simple_strtoul(str, &str, 0)); + if (*str == ',') + ++str; + if (!strcmp(str, "force")) + swiotlb_force_bounce = true; + else if (!strcmp(str, "noforce")) + swiotlb_force_disable = true; return 0; } early_param("swiotlb", setup_io_tlb_npages); -static bool no_iotlb_memory; - -unsigned long swiotlb_nr_tbl(void) -{ - return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) +unsigned long swiotlb_size_or_default(void) { - return unlikely(no_iotlb_memory) ? 0 : max_segment; + return default_nslabs << IO_TLB_SHIFT; } -EXPORT_SYMBOL_GPL(swiotlb_max_segment); -void swiotlb_set_max_segment(unsigned int val) +void __init swiotlb_adjust_size(unsigned long size) { - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - -/* default to 64MB */ -#define IO_TLB_DEFAULT_SIZE (64UL<<20) -unsigned long swiotlb_size_or_default(void) -{ - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; + /* + * If swiotlb parameter has not been specified, give a chance to + * architectures such as those supporting memory encryption to + * adjust/expand SWIOTLB size for their use. + */ + if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) + return; - return size ? size : (IO_TLB_DEFAULT_SIZE); + size = ALIGN(size, IO_TLB_SIZE); + default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + if (round_up_default_nslabs()) + size = default_nslabs << IO_TLB_SHIFT; + pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); } void swiotlb_print_info(void) { - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; - if (no_iotlb_memory) { + if (!mem->nslabs) { pr_warn("No low mem\n"); return; } - pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n", - (unsigned long long)io_tlb_start, - (unsigned long long)io_tlb_end, - bytes >> 20); + pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end, + (mem->nslabs << IO_TLB_SHIFT) >> 20); +} + +static inline unsigned long io_tlb_offset(unsigned long val) +{ + return val & (IO_TLB_SEGSIZE - 1); +} + +static inline unsigned long nr_slots(u64 val) +{ + return DIV_ROUND_UP(val, IO_TLB_SIZE); } /* @@ -186,87 +257,169 @@ void swiotlb_print_info(void) */ void __init swiotlb_update_mem_attributes(void) { - void *vaddr; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long bytes; - if (no_iotlb_memory || late_alloc) + if (!mem->nslabs || mem->late_alloc) return; + bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); + set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); +} + +static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, + unsigned long nslabs, bool late_alloc, unsigned int nareas) +{ + void *vaddr = phys_to_virt(start); + unsigned long bytes = nslabs << IO_TLB_SHIFT, i; + + mem->nslabs = nslabs; + mem->start = start; + mem->end = mem->start + bytes; + mem->late_alloc = late_alloc; + mem->nareas = nareas; + mem->area_nslabs = nslabs / mem->nareas; + + for (i = 0; i < mem->nareas; i++) { + spin_lock_init(&mem->areas[i].lock); + mem->areas[i].index = 0; + mem->areas[i].used = 0; + } + + for (i = 0; i < mem->nslabs; i++) { + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; + } - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); + mem->vaddr = vaddr; + return; } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +/** + * add_mem_pool() - add a memory pool to the allocator + * @mem: Software IO TLB allocator. + * @pool: Memory pool to be added. + */ +static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool) { - unsigned long i, bytes; - size_t alloc_size; - - bytes = nslabs << IO_TLB_SHIFT; +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock(&mem->lock); + list_add_rcu(&pool->node, &mem->pools); + mem->nslabs += pool->nslabs; + spin_unlock(&mem->lock); +#else + mem->nslabs = pool->nslabs; +#endif +} - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; +static void __init *swiotlb_memblock_alloc(unsigned long nslabs, + unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) +{ + size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); + void *tlb; /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. + * By default allocate the bounce buffer memory from low memory, but + * allow to pick a location everywhere for hypervisors with guest + * memory encryption. */ - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int)); - io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE); - if (!io_tlb_list) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)); - io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE); - if (!io_tlb_orig_addr) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); + if (flags & SWIOTLB_ANY) + tlb = memblock_alloc(bytes, PAGE_SIZE); + else + tlb = memblock_alloc_low(bytes, PAGE_SIZE); - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + if (!tlb) { + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", + __func__, bytes); + return NULL; } - io_tlb_index = 0; - if (verbose) - swiotlb_print_info(); + if (remap && remap(tlb, nslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); + return NULL; + } - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; + return tlb; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init -swiotlb_init(int verbose) +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) { - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long nslabs; + unsigned int nareas; + size_t alloc_size; + void *tlb; + + if (!addressing_limit && !swiotlb_force_bounce) + return; + if (swiotlb_force_disable) + return; + + io_tlb_default_mem.force_bounce = + swiotlb_force_bounce || (flags & SWIOTLB_FORCE); + +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; + if (flags & SWIOTLB_ANY) + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); + else + io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT; +#endif + + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); + + nslabs = default_nslabs; + nareas = limit_nareas(default_nareas, nslabs); + while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { + if (nslabs <= IO_TLB_MIN_SLABS) + return; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + } - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + if (default_nslabs != nslabs) { + pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", + default_nslabs, nslabs); + default_nslabs = nslabs; } - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) { + pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", + __func__, alloc_size, PAGE_SIZE); + return; + } - /* Get IO TLB memory from the low pages */ - vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), + nareas), SMP_CACHE_BYTES); + if (!mem->areas) { + pr_warn("%s: Failed to allocate mem->areas.\n", __func__); return; + } - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate buffer"); - no_iotlb_memory = true; + swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); + add_mem_pool(&io_tlb_default_mem, mem); + + if (flags & SWIOTLB_VERBOSE) + swiotlb_print_info(); +} + +void __init swiotlb_init(bool addressing_limit, unsigned int flags) +{ + swiotlb_init_remap(addressing_limit, flags, NULL); } /* @@ -274,148 +427,473 @@ swiotlb_init(int verbose) * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ -int -swiotlb_late_init_with_default_size(size_t default_size) +int swiotlb_init_late(size_t size, gfp_t gfp_mask, + int (*remap)(void *tlb, unsigned long nslabs)) { - unsigned long bytes, req_nslabs = io_tlb_nslabs; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + unsigned int nareas; unsigned char *vstart = NULL; - unsigned int order; + unsigned int order, area_order; + bool retried = false; int rc = 0; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } + if (io_tlb_default_mem.nslabs) + return 0; - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + if (swiotlb_force_disable) + return 0; + + io_tlb_default_mem.force_bounce = swiotlb_force_bounce; + +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) + io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits); + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) + io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32); + else + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); +#endif + + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); + +retry: + order = get_order(nslabs << IO_TLB_SHIFT); + nslabs = SLABS_PER_PAGE << order; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, order); if (vstart) break; order--; + nslabs = SLABS_PER_PAGE << order; + retried = true; } - if (!vstart) { - io_tlb_nslabs = req_nslabs; + if (!vstart) return -ENOMEM; + + if (remap) + rc = remap(vstart, nslabs); + if (rc) { + free_pages((unsigned long)vstart, order); + + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + if (nslabs < IO_TLB_MIN_SLABS) + return rc; + retried = true; + goto retry; } - if (order != get_order(bytes)) { + + if (retried) { pr_warn("only able to allocate %ld MB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - return rc; + nareas = limit_nareas(default_nareas, nslabs); + area_order = get_order(array_size(sizeof(*mem->areas), nareas)); + mem->areas = (struct io_tlb_area *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); + if (!mem->areas) + goto error_area; + + mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(array_size(sizeof(*mem->slots), nslabs))); + if (!mem->slots) + goto error_slots; + + set_memory_decrypted((unsigned long)vstart, + (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); + swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, + nareas); + add_mem_pool(&io_tlb_default_mem, mem); + + swiotlb_print_info(); + return 0; + +error_slots: + free_pages((unsigned long)mem->areas, area_order); +error_area: + free_pages((unsigned long)vstart, order); + return -ENOMEM; } -static void swiotlb_cleanup(void) +void __init swiotlb_exit(void) { - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long tbl_vaddr; + size_t tbl_size, slots_size; + unsigned int area_order; + + if (swiotlb_force_bounce) + return; + + if (!mem->nslabs) + return; + + pr_info("tearing down default memory pool\n"); + tbl_vaddr = (unsigned long)phys_to_virt(mem->start); + tbl_size = PAGE_ALIGN(mem->end - mem->start); + slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs)); + + set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); + if (mem->late_alloc) { + area_order = get_order(array_size(sizeof(*mem->areas), + mem->nareas)); + free_pages((unsigned long)mem->areas, area_order); + free_pages(tbl_vaddr, get_order(tbl_size)); + free_pages((unsigned long)mem->slots, get_order(slots_size)); + } else { + memblock_free_late(__pa(mem->areas), + array_size(sizeof(*mem->areas), mem->nareas)); + memblock_free_late(mem->start, tbl_size); + memblock_free_late(__pa(mem->slots), slots_size); + } + + memset(mem, 0, sizeof(*mem)); } -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * alloc_dma_pages() - allocate pages to be used for DMA + * @gfp: GFP flags for the allocation. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * + * Allocate pages from the buddy allocator. If successful, make the allocated + * pages decrypted that they can be used for DMA. + * + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. + */ +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { - unsigned long i, bytes; + unsigned int order = get_order(bytes); + struct page *page; + phys_addr_t paddr; + void *vaddr; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; - bytes = nslabs << IO_TLB_SHIFT; + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } + + vaddr = phys_to_virt(paddr); + if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) + goto error; + return page; - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; +error: + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); + return NULL; +} - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); +/** + * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer + * @dev: Device for which a memory pool is allocated. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * @gfp: GFP flags for the allocation. + * + * Return: Allocated pages, or %NULL on allocation failure. + */ +static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, + u64 phys_limit, gfp_t gfp) +{ + struct page *page; /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. + * Allocate from the atomic pools if memory is encrypted and + * the allocation is atomic, because decrypting may block. */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; + if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { + void *vaddr; - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; + if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) + return NULL; - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, + dma_coherent_ok); } - io_tlb_index = 0; - swiotlb_print_info(); + gfp &= ~GFP_ZONEMASK; + if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + gfp |= __GFP_DMA; + else if (phys_limit <= DMA_BIT_MASK(32)) + gfp |= __GFP_DMA32; + + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + phys_limit < DMA_BIT_MASK(64) && + !(gfp & (__GFP_DMA32 | __GFP_DMA))) + gfp |= __GFP_DMA32; + else if (IS_ENABLED(CONFIG_ZONE_DMA) && + !(gfp & __GFP_DMA)) + gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; + else + return NULL; + } - late_alloc = 1; + return page; +} - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); +/** + * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer + * @vaddr: Virtual address of the buffer. + * @bytes: Size of the buffer. + */ +static void swiotlb_free_tlb(void *vaddr, size_t bytes) +{ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(NULL, vaddr, bytes)) + return; - return 0; + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(virt_to_page(vaddr), get_order(bytes)); +} -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - swiotlb_cleanup(); - return -ENOMEM; +/** + * swiotlb_alloc_pool() - allocate a new IO TLB memory pool + * @dev: Device for which a memory pool is allocated. + * @minslabs: Minimum number of slabs. + * @nslabs: Desired (maximum) number of slabs. + * @nareas: Number of areas. + * @phys_limit: Maximum DMA buffer physical address. + * @gfp: GFP flags for the allocations. + * + * Allocate and initialize a new IO TLB memory pool. The actual number of + * slabs may be reduced if allocation of @nslabs fails. If even + * @minslabs cannot be allocated, this function fails. + * + * Return: New memory pool, or %NULL on allocation failure. + */ +static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, + unsigned long minslabs, unsigned long nslabs, + unsigned int nareas, u64 phys_limit, gfp_t gfp) +{ + struct io_tlb_pool *pool; + unsigned int slot_order; + struct page *tlb; + size_t pool_size; + size_t tlb_size; + + if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { + nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; + nareas = limit_nareas(nareas, nslabs); + } + + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); + pool = kzalloc(pool_size, gfp); + if (!pool) + goto error; + pool->areas = (void *)pool + sizeof(*pool); + + tlb_size = nslabs << IO_TLB_SHIFT; + while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) { + if (nslabs <= minslabs) + goto error_tlb; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + tlb_size = nslabs << IO_TLB_SHIFT; + } + + slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); + pool->slots = (struct io_tlb_slot *) + __get_free_pages(gfp, slot_order); + if (!pool->slots) + goto error_slots; + + swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas); + return pool; + +error_slots: + swiotlb_free_tlb(page_address(tlb), tlb_size); +error_tlb: + kfree(pool); +error: + return NULL; } -void __init swiotlb_exit(void) +/** + * swiotlb_dyn_alloc() - dynamic memory pool allocation worker + * @work: Pointer to dyn_alloc in struct io_tlb_mem. + */ +static void swiotlb_dyn_alloc(struct work_struct *work) { - if (!io_tlb_orig_addr) + struct io_tlb_mem *mem = + container_of(work, struct io_tlb_mem, dyn_alloc); + struct io_tlb_pool *pool; + + pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs, + default_nareas, mem->phys_limit, GFP_KERNEL); + if (!pool) { + pr_warn_ratelimited("Failed to allocate new pool"); return; + } - if (late_alloc) { - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + add_mem_pool(mem, pool); +} + +/** + * swiotlb_dyn_free() - RCU callback to free a memory pool + * @rcu: RCU head in the corresponding struct io_tlb_pool. + */ +static void swiotlb_dyn_free(struct rcu_head *rcu) +{ + struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); + size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); + size_t tlb_size = pool->end - pool->start; + + free_pages((unsigned long)pool->slots, get_order(slots_size)); + swiotlb_free_tlb(pool->vaddr, tlb_size); + kfree(pool); +} + +/** + * swiotlb_find_pool() - find the IO TLB pool for a physical address + * @dev: Device which has mapped the DMA buffer. + * @paddr: Physical address within the DMA buffer. + * + * Find the IO TLB memory pool descriptor which contains the given physical + * address, if any. + * + * Return: Memory pool which contains @paddr, or %NULL if none. + */ +struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; } - swiotlb_cleanup(); + + list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; + } + pool = NULL; +out: + rcu_read_unlock(); + return pool; +} + +/** + * swiotlb_del_pool() - remove an IO TLB pool from a device + * @dev: Owning device. + * @pool: Memory pool to be removed. + */ +static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_del_rcu(&pool->node); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + + call_rcu(&pool->rcu, swiotlb_dyn_free); +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/** + * swiotlb_dev_init() - initialize swiotlb fields in &struct device + * @dev: Device to be initialized. + */ +void swiotlb_dev_init(struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +#ifdef CONFIG_SWIOTLB_DYNAMIC + INIT_LIST_HEAD(&dev->dma_io_tlb_pools); + spin_lock_init(&dev->dma_io_tlb_lock); + dev->dma_uses_io_tlb = false; +#endif +} + +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. + */ +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* * Bounce: copy the swiotlb buffer from or back to the original dma location */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, + enum dma_data_direction dir) { + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); + int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = mem->slots[index].orig_addr; + size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; + int tlb_offset; + + if (orig_addr == INVALID_PHYS_ADDR) + return; + + /* + * It's valid for tlb_offset to be negative. This can happen when the + * "offset" returned by swiotlb_align_offset() is non-zero, and the + * tlb_addr is pointing within the first "offset" bytes of the second + * or subsequent slots of the allocated swiotlb area. While it's not + * valid for tlb_addr to be pointing within the first "offset" bytes + * of the first slot, there's no way to check for such an error since + * this function can't distinguish the first slot from the second and + * subsequent slots. + */ + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, 0, orig_addr); + + orig_addr += tlb_offset; + alloc_size -= tlb_offset; + + if (size > alloc_size) { + dev_WARN_ONCE(dev, 1, + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", + alloc_size, size); + size = alloc_size; + } if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; + struct page *page; unsigned int sz = 0; unsigned long flags; @@ -423,12 +901,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); + page = pfn_to_page(pfn); if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); + memcpy_from_page(vaddr, page, offset, sz); else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); + memcpy_to_page(page, offset, vaddr, sz); local_irq_restore(flags); size -= sz; @@ -443,157 +920,504 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } } -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs) +static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) { - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - unsigned long tmp_io_tlb_used; + return start + (idx << IO_TLB_SHIFT); +} - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); +/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ +static inline unsigned long get_max_slots(unsigned long boundary_mask) +{ + return (boundary_mask >> IO_TLB_SHIFT) + 1; +} - if (mem_encrypt_active()) - pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); +static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) +{ + if (index >= mem->area_nslabs) + return 0; + return index; +} - if (mapping_size > alloc_size) { - dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } +/* + * Track the total used slots with a global atomic value in order to have + * correct information to determine the high water mark. The mem_used() + * function gives imprecise results because there's no locking across + * multiple areas. + */ +#ifdef CONFIG_DEBUG_FS +static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) +{ + unsigned long old_hiwater, new_used; + + new_used = atomic_long_add_return(nslots, &mem->total_used); + old_hiwater = atomic_long_read(&mem->used_hiwater); + do { + if (new_used <= old_hiwater) + break; + } while (!atomic_long_try_cmpxchg(&mem->used_hiwater, + &old_hiwater, new_used)); +} - mask = dma_get_seg_boundary(hwdev); +static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_sub(nslots, &mem->total_used); +} - tbl_dma_addr &= mask; +#else /* !CONFIG_DEBUG_FS */ +static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +#endif /* CONFIG_DEBUG_FS */ - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; +#ifdef CONFIG_SWIOTLB_DYNAMIC +#ifdef CONFIG_DEBUG_FS +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_add(nslots, &mem->transient_nslabs); +} + +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_sub(nslots, &mem->transient_nslabs); +} + +#else /* !CONFIG_DEBUG_FS */ +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/** + * swiotlb_search_pool_area() - search one memory area in one pool + * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. + * @area_index: Index of the IO TLB memory area to be searched. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Find a suitable sequence of IO TLB entries for the request and allocate + * a buffer from the given IO TLB memory area. + * This function takes care of locking. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool, + int area_index, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask) +{ + struct io_tlb_area *area = pool->areas + area_index; + unsigned long boundary_mask = dma_get_seg_boundary(dev); + dma_addr_t tbl_dma_addr = + phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); + unsigned int nslots = nr_slots(alloc_size), stride; + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); + unsigned int index, slots_checked, count = 0, i; + unsigned long flags; + unsigned int slot_base; + unsigned int slot_index; + + BUG_ON(!nslots); + BUG_ON(area_index >= pool->nareas); /* - * Carefully handle integer overflow which can occur when mask == ~0UL. + * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be + * page-aligned in the absence of any other alignment requirements. + * 'alloc_align_mask' was later introduced to specify the alignment + * explicitly, however this is passed as zero for streaming mappings + * and so we preserve the old behaviour there in case any drivers are + * relying on it. */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) + alloc_align_mask = PAGE_SIZE - 1; /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. */ - nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (alloc_size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. + * For mappings with an alignment requirement don't bother looping to + * unaligned slots once we found an aligned one. */ - spin_lock_irqsave(&io_tlb_lock, flags); + stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); - if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) + spin_lock_irqsave(&area->lock, flags); + if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; + slot_base = area_index * pool->area_nslabs; + index = area->index; - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } + for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { + phys_addr_t tlb_addr; - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); + slot_index = slot_base + index; + tlb_addr = slot_addr(tbl_dma_addr, slot_index); - goto found; + if ((tlb_addr & alloc_align_mask) || + (orig_addr && (tlb_addr & iotlb_align_mask) != + (orig_addr & iotlb_align_mask))) { + index = wrap_area_index(pool, index + 1); + slots_checked++; + continue; + } + + if (!iommu_is_span_boundary(slot_index, nslots, + nr_slots(tbl_dma_addr), + max_slots)) { + if (pool->slots[slot_index].list >= nslots) + goto found; } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); + index = wrap_area_index(pool, index + stride); + slots_checked += stride; + } not_found: - tmp_io_tlb_used = io_tlb_used; + spin_unlock_irqrestore(&area->lock, flags); + return -1; - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, io_tlb_nslabs, tmp_io_tlb_used); - return (phys_addr_t)DMA_MAPPING_ERROR; found: - io_tlb_used += nslots; - spin_unlock_irqrestore(&io_tlb_lock, flags); + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot onwards + * and set the list of free entries to '0' indicating unavailable. + */ + for (i = slot_index; i < slot_index + nslots; i++) { + pool->slots[i].list = 0; + pool->slots[i].alloc_size = alloc_size - (offset + + ((i - slot_index) << IO_TLB_SHIFT)); + } + for (i = slot_index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + pool->slots[i].list; i--) + pool->slots[i].list = ++count; /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. + * Update the indices to avoid searching in the next round. */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); + area->index = wrap_area_index(pool, index + nslots); + area->used += nslots; + spin_unlock_irqrestore(&area->lock, flags); - return tlb_addr; + inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots); + return slot_index; } -/* - * tlb_addr is the physical address of the bounce buffer to unmap. +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * swiotlb_search_area() - search one memory area in all pools + * @dev: Device which maps the buffer. + * @start_cpu: Start CPU number. + * @cpu_offset: Offset from @start_cpu. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. + * + * Search one memory area in all pools for a sequence of slots that match the + * allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int swiotlb_search_area(struct device *dev, int start_cpu, + int cpu_offset, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask, struct io_tlb_pool **retpool) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + int area_index; + int index = -1; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (cpu_offset >= pool->nareas) + continue; + area_index = (start_cpu + cpu_offset) & (pool->nareas - 1); + index = swiotlb_search_pool_area(dev, pool, area_index, + orig_addr, alloc_size, + alloc_align_mask); + if (index >= 0) { + *retpool = pool; + break; + } + } + rcu_read_unlock(); + return index; +} + +/** + * swiotlb_find_slots() - search for slots in the whole swiotlb + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. + * + * Search through the whole software IO TLB to find a sequence of slots that + * match the allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs) +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) { + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + unsigned long nslabs; unsigned long flags; - int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; + u64 phys_limit; + int cpu, i; + int index; + + if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) + return -1; + + cpu = raw_smp_processor_id(); + for (i = 0; i < default_nareas; ++i) { + index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, + alloc_align_mask, &pool); + if (index >= 0) + goto found; + } + + if (!mem->can_grow) + return -1; + + schedule_work(&mem->dyn_alloc); + + nslabs = nr_slots(alloc_size); + phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); + pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, + GFP_NOWAIT | __GFP_NOWARN); + if (!pool) + return -1; + + index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, + alloc_size, alloc_align_mask); + if (index < 0) { + swiotlb_dyn_free(&pool->rcu); + return -1; + } + + pool->transient = true; + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + inc_transient_used(mem, pool->nslabs); + +found: + WRITE_ONCE(dev->dma_uses_io_tlb, true); /* - * First, sync the memory before unmapping the entry + * The general barrier orders reads and writes against a presumed store + * of the SWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in is_swiotlb_buffer(). + */ + smp_mb(); + + *retpool = pool; + return index; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) +{ + struct io_tlb_pool *pool; + int start, i; + int index; + + *retpool = pool = &dev->dma_io_tlb_mem->defpool; + i = start = raw_smp_processor_id() & (pool->nareas - 1); + do { + index = swiotlb_search_pool_area(dev, pool, i, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) + return index; + if (++i >= pool->nareas) + i = 0; + } while (i != start); + return -1; +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +#ifdef CONFIG_DEBUG_FS + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is accurate in this version of the function, because an atomic + * counter is available if CONFIG_DEBUG_FS is set. + * + * Return: Number of used slots. + */ +static unsigned long mem_used(struct io_tlb_mem *mem) +{ + return atomic_long_read(&mem->total_used); +} + +#else /* !CONFIG_DEBUG_FS */ + +/** + * mem_pool_used() - get number of used slots in a memory pool + * @pool: Software IO TLB memory pool. + * + * The result is not accurate, see mem_used(). + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_pool_used(struct io_tlb_pool *pool) +{ + int i; + unsigned long used = 0; + + for (i = 0; i < pool->nareas; i++) + used += pool->areas[i].used; + return used; +} + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is not accurate, because there is no locking of individual + * areas. + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_used(struct io_tlb_mem *mem) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct io_tlb_pool *pool; + unsigned long used = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) + used += mem_pool_used(pool); + rcu_read_unlock(); + + return used; +#else + return mem_pool_used(&mem->defpool); +#endif +} + +#endif /* CONFIG_DEBUG_FS */ + +phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + unsigned int alloc_align_mask, enum dma_data_direction dir, + unsigned long attrs) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + unsigned int offset; + struct io_tlb_pool *pool; + unsigned int i; + int index; + phys_addr_t tlb_addr; + unsigned short pad_slots; + + if (!mem || !mem->nslabs) { + dev_warn_ratelimited(dev, + "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); + + if (mapping_size > alloc_size) { + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", + mapping_size, alloc_size); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); + index = swiotlb_find_slots(dev, orig_addr, + alloc_size + offset, alloc_align_mask, &pool); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + alloc_size, mem->nslabs, mem_used(mem)); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; + for (i = 0; i < nr_slots(alloc_size + offset); i++) + pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(pool->start, index) + offset; + /* + * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy + * the original buffer to the TLB buffer before initiating DMA in order + * to preserve the original's data if the device does a partial write, + * i.e. if the device doesn't overwrite the entire buffer. Preserving + * the original data, even if it's garbage, is necessary to match + * hardware behavior. Use of swiotlb is supposed to be transparent, + * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + return tlb_addr; +} + +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) +{ + struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); + unsigned long flags; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; + int count, i; + + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; /* * Return the buffer to the free list by setting the corresponding @@ -601,59 +1425,110 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; + BUG_ON(aindex >= mem->nareas); + + spin_lock_irqsave(&area->lock, flags); + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = mem->slots[index + nslots].list; + else + count = 0; - io_tlb_used -= nslots; + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + mem->slots[i].list = ++count; + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } - spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; + i--) + mem->slots[i].list = ++count; + area->used -= nslots; + spin_unlock_irqrestore(&area->lock, flags); + + dec_used(dev->dma_io_tlb_mem, nslots); } -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * swiotlb_del_transient() - delete a transient memory pool + * @dev: Device which mapped the buffer. + * @tlb_addr: Physical address within a bounce buffer. + * + * Check whether the address belongs to a transient SWIOTLB memory pool. + * If yes, then delete the pool. + * + * Return: %true if @tlb_addr belonged to a transient pool that was released. + */ +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) { - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; + struct io_tlb_pool *pool; - if (orig_addr == INVALID_PHYS_ADDR) + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool->transient) + return false; + + dec_used(dev->dma_io_tlb_mem, pool->nslabs); + swiotlb_del_pool(dev, pool); + dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs); + return true; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static inline bool swiotlb_del_transient(struct device *dev, + phys_addr_t tlb_addr) +{ + return false; +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/* + * tlb_addr is the physical address of the bounce buffer to unmap. + */ +void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * First, sync the memory before unmapping the entry + */ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + + if (swiotlb_del_transient(dev, tlb_addr)) return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); + swiotlb_release_slots(dev, tlb_addr); +} - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } +void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) +{ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + else + BUG_ON(dir != DMA_FROM_DEVICE); +} + +void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir) +{ + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); + else + BUG_ON(dir != DMA_TO_DEVICE); } /* @@ -666,19 +1541,17 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, phys_addr_t swiotlb_addr; dma_addr_t dma_addr; - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, - swiotlb_force); + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - swiotlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - paddr, size, size, dir, attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, + attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ - dma_addr = __phys_to_dma(dev, swiotlb_addr); + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, + swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", @@ -693,30 +1566,278 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, size_t swiotlb_max_mapping_size(struct device *dev) { - return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; -} + int min_align_mask = dma_get_min_align_mask(dev); + int min_align = 0; -bool is_swiotlb_active(void) -{ /* - * When SWIOTLB is initialized, even if io_tlb_start points to physical - * address zero, io_tlb_end surely doesn't. + * swiotlb_find_slots() skips slots according to + * min align mask. This affects max mapping size. + * Take it into acount here. */ - return io_tlb_end != 0; + if (min_align_mask) + min_align = roundup(min_align_mask, IO_TLB_SIZE); + + return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; +} + +/** + * is_swiotlb_allocated() - check if the default software IO TLB is initialized + */ +bool is_swiotlb_allocated(void) +{ + return io_tlb_default_mem.nslabs; +} + +bool is_swiotlb_active(struct device *dev) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + + return mem && mem->nslabs; +} + +/** + * default_swiotlb_base() - get the base address of the default SWIOTLB + * + * Get the lowest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_base(void) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + io_tlb_default_mem.can_grow = false; +#endif + return io_tlb_default_mem.defpool.start; +} + +/** + * default_swiotlb_limit() - get the address limit of the default SWIOTLB + * + * Get the highest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_limit(void) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + return io_tlb_default_mem.phys_limit; +#else + return io_tlb_default_mem.defpool.end - 1; +#endif } #ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_SWIOTLB_DYNAMIC +static unsigned long mem_transient_used(struct io_tlb_mem *mem) +{ + return atomic_long_read(&mem->transient_nslabs); +} + +static int io_tlb_transient_used_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = data; + + *val = mem_transient_used(mem); + return 0; +} -static int __init swiotlb_create_debugfs(void) +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get, + NULL, "%llu\n"); +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +static int io_tlb_used_get(void *data, u64 *val) { - struct dentry *root; + struct io_tlb_mem *mem = data; - root = debugfs_create_dir("swiotlb", NULL); - debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs); - debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used); + *val = mem_used(mem); return 0; } -late_initcall(swiotlb_create_debugfs); +static int io_tlb_hiwater_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = data; + + *val = atomic_long_read(&mem->used_hiwater); + return 0; +} +static int io_tlb_hiwater_set(void *data, u64 val) +{ + struct io_tlb_mem *mem = data; + + /* Only allow setting to zero */ + if (val != 0) + return -EINVAL; + + atomic_long_set(&mem->used_hiwater, val); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, + io_tlb_hiwater_set, "%llu\n"); + +static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, + const char *dirname) +{ + mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); + if (!mem->nslabs) + return; + + debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); + debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem, + &fops_io_tlb_used); + debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, + &fops_io_tlb_hiwater); +#ifdef CONFIG_SWIOTLB_DYNAMIC + debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, + mem, &fops_io_tlb_transient_used); +#endif +} + +static int __init swiotlb_create_default_debugfs(void) +{ + swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb"); + return 0; +} + +late_initcall(swiotlb_create_default_debugfs); + +#else /* !CONFIG_DEBUG_FS */ + +static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, + const char *dirname) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_DMA_RESTRICTED_POOL + +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + phys_addr_t tlb_addr; + unsigned int align; + int index; + + if (!mem) + return NULL; + + align = (1 << (get_order(size) + PAGE_SHIFT)) - 1; + index = swiotlb_find_slots(dev, 0, size, align, &pool); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(pool->start, index); + if (unlikely(!PAGE_ALIGNED(tlb_addr))) { + dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", + &tlb_addr); + swiotlb_release_slots(dev, tlb_addr); + return NULL; + } + + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + + if (!is_swiotlb_buffer(dev, tlb_addr)) + return false; + + swiotlb_release_slots(dev, tlb_addr); + + return true; +} + +static int rmem_swiotlb_device_init(struct reserved_mem *rmem, + struct device *dev) +{ + struct io_tlb_mem *mem = rmem->priv; + unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; + + /* Set Per-device io tlb area to one */ + unsigned int nareas = 1; + + if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) { + dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping."); + return -EINVAL; + } + + /* + * Since multiple devices can share the same pool, the private data, + * io_tlb_mem struct, will be initialized by the first device attached + * to it. + */ + if (!mem) { + struct io_tlb_pool *pool; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; + pool = &mem->defpool; + + pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL); + if (!pool->slots) { + kfree(mem); + return -ENOMEM; + } + + pool->areas = kcalloc(nareas, sizeof(*pool->areas), + GFP_KERNEL); + if (!pool->areas) { + kfree(pool->slots); + kfree(mem); + return -ENOMEM; + } + + set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), + rmem->size >> PAGE_SHIFT); + swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs, + false, nareas); + mem->force_bounce = true; + mem->for_alloc = true; +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock_init(&mem->lock); + INIT_LIST_HEAD_RCU(&mem->pools); #endif + add_mem_pool(mem, pool); + + rmem->priv = mem; + + swiotlb_create_debugfs_files(mem, rmem->name); + } + + dev->dma_io_tlb_mem = mem; + + return 0; +} + +static void rmem_swiotlb_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +} + +static const struct reserved_mem_ops rmem_swiotlb_ops = { + .device_init = rmem_swiotlb_device_init, + .device_release = rmem_swiotlb_device_release, +}; + +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "linux,cma-default", NULL) || + of_get_flat_dt_prop(node, "linux,dma-default", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + rmem->ops = &rmem_swiotlb_ops; + pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return 0; +} + +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); +#endif /* CONFIG_DMA_RESTRICTED_POOL */ diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c deleted file mode 100644 index ebe128833af7..000000000000 --- a/kernel/dma/virt.c +++ /dev/null @@ -1,59 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map to virtual addresses without flushing memory. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/scatterlist.h> - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, -}; -EXPORT_SYMBOL(dma_virt_ops); |