diff options
Diffstat (limited to 'kernel/dma')
-rw-r--r-- | kernel/dma/Kconfig | 101 | ||||
-rw-r--r-- | kernel/dma/Makefile | 6 | ||||
-rw-r--r-- | kernel/dma/coherent.c | 190 | ||||
-rw-r--r-- | kernel/dma/contiguous.c | 113 | ||||
-rw-r--r-- | kernel/dma/debug.c | 320 | ||||
-rw-r--r-- | kernel/dma/debug.h | 24 | ||||
-rw-r--r-- | kernel/dma/direct.c | 407 | ||||
-rw-r--r-- | kernel/dma/direct.h | 24 | ||||
-rw-r--r-- | kernel/dma/dummy.c | 23 | ||||
-rw-r--r-- | kernel/dma/map_benchmark.c | 92 | ||||
-rw-r--r-- | kernel/dma/mapping.c | 509 | ||||
-rw-r--r-- | kernel/dma/ops_helpers.c | 26 | ||||
-rw-r--r-- | kernel/dma/pool.c | 18 | ||||
-rw-r--r-- | kernel/dma/remap.c | 11 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 1993 |
15 files changed, 2816 insertions, 1041 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 479fc145acfc..31cfdb6b4bc3 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -8,8 +8,7 @@ config HAS_DMA depends on !NO_DMA default y -config DMA_OPS - depends on HAS_DMA +config DMA_OPS_HELPERS bool # @@ -24,6 +23,9 @@ config DMA_OPS_BYPASS config ARCH_HAS_DMA_MAP_DIRECT bool +config NEED_SG_DMA_FLAGS + bool + config NEED_SG_DMA_LENGTH bool @@ -33,16 +35,13 @@ config NEED_DMA_MAP_STATE config ARCH_DMA_ADDR_T_64BIT def_bool 64BIT || PHYS_ADDR_T_64BIT -config ARCH_HAS_DMA_COHERENCE_H - bool - config ARCH_HAS_DMA_SET_MASK bool # # Select this option if the architecture needs special handling for # DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what -# people thing of when saying write combine, so very few platforms should +# people think of when saying write combine, so very few platforms should # need to enable this. # config ARCH_HAS_DMA_WRITE_COMBINE @@ -79,10 +78,52 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool +# +# Select this option if the architecture assumes DMA devices are coherent +# by default. +# +config ARCH_DMA_DEFAULT_COHERENT + bool + config SWIOTLB bool select NEED_DMA_MAP_STATE +config SWIOTLB_DYNAMIC + bool "Dynamic allocation of DMA bounce buffers" + default n + depends on SWIOTLB + help + This enables dynamic resizing of the software IO TLB. The kernel + starts with one memory pool at boot and it will allocate additional + pools as needed. To reduce run-time kernel memory requirements, you + may have to specify a smaller size of the initial pool using + "swiotlb=" on the kernel command line. + + If unsure, say N. + +config DMA_BOUNCE_UNALIGNED_KMALLOC + bool + depends on SWIOTLB + +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || \ + ARCH_HAS_DMA_OPS || SWIOTLB + +config DMA_RESTRICTED_POOL + bool "DMA Restricted Pool" + depends on OF && OF_RESERVED_MEM && SWIOTLB + help + This enables support for restricted DMA pools which provide a level of + DMA memory protection on systems with limited hardware protection + capabilities, such as those lacking an IOMMU. + + For more information see + <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt> + and <kernel/dma/swiotlb.c>. + If unsure, say "n". + # # Should be selected if we can mmap non-coherent mappings to userspace. # The only thing that is really required is a way to set an uncached bit @@ -96,15 +137,25 @@ config DMA_COHERENT_POOL select GENERIC_ALLOCATOR bool -config DMA_REMAP +config DMA_GLOBAL_POOL + select DMA_DECLARE_COHERENT + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP bool - depends on MMU - select DMA_NONCOHERENT_MMAP config DMA_DIRECT_REMAP bool - select DMA_REMAP select DMA_COHERENT_POOL + select DMA_NONCOHERENT_MMAP + +# +# Fallback to arch code for DMA allocations. This should eventually go away. +# +config ARCH_HAS_DMA_ALLOC + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP + depends on !DMA_GLOBAL_POOL + bool config DMA_CMA bool "DMA Contiguous Memory Allocator" @@ -122,15 +173,16 @@ config DMA_CMA if DMA_CMA -config DMA_PERNUMA_CMA - bool "Enable separate DMA Contiguous Memory Area for each NUMA Node" - default NUMA && ARM64 +config DMA_NUMA_CMA + bool "Enable separate DMA Contiguous Memory Area for NUMA Node" + depends on NUMA help - Enable this option to get pernuma CMA areas so that devices like - ARM64 SMMU can get local memory by DMA coherent APIs. + Enable this option to get numa CMA areas so that NUMA devices + can get local memory by DMA coherent APIs. You can set the size of pernuma CMA by specifying "cma_pernuma=size" - on the kernel's command line. + or set the node id and its size of CMA by specifying "numa_cma= + <node>:size[,<node>:size]" on the kernel's command line. comment "Default contiguous memory area size:" @@ -208,23 +260,6 @@ config DMA_API_DEBUG If unsure, say N. -config DMA_API_DEBUG_SG - bool "Debug DMA scatter-gather usage" - default y - depends on DMA_API_DEBUG - help - Perform extra checking that callers of dma_map_sg() have respected the - appropriate segment length/boundary limits for the given device when - preparing DMA scatterlists. - - This is particularly likely to have been overlooked in cases where the - dma_map_sg() API is used for general bulk mapping of pages rather than - preparing literal scatter-gather descriptors, where there is a risk of - unexpected behaviour from DMA API implementations if the scatterlist - is technically out-of-spec. - - If unsure, say N. - config DMA_MAP_BENCHMARK bool "Enable benchmarking of streaming DMA mapping" depends on DEBUG_FS diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 0dd65ec1d234..6977033444a3 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o -obj-$(CONFIG_DMA_OPS) += ops_helpers.o -obj-$(CONFIG_DMA_OPS) += dummy.o +obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o +obj-$(CONFIG_ARCH_HAS_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o -obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_MMU) += remap.o obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 5b5b6c7ec7f2..3b2bdca9f1d4 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -20,8 +20,6 @@ struct dma_coherent_mem { bool use_dev_dma_pfn_offset; }; -static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; - static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev) { if (dev && dev->dma_mem) @@ -37,60 +35,52 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, return mem->device_base; } -static int dma_init_coherent_memory(phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, - struct dma_coherent_mem **mem) +static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset) { - struct dma_coherent_mem *dma_mem = NULL; - void *mem_base = NULL; + struct dma_coherent_mem *dma_mem; int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - int ret; + void *mem_base; - if (!size) { - ret = -EINVAL; - goto out; - } + if (!size) + return ERR_PTR(-EINVAL); mem_base = memremap(phys_addr, size, MEMREMAP_WC); - if (!mem_base) { - ret = -EINVAL; - goto out; - } + if (!mem_base) + return ERR_PTR(-EINVAL); + dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dma_mem) { - ret = -ENOMEM; - goto out; - } - dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dma_mem->bitmap) { - ret = -ENOMEM; - goto out; - } + if (!dma_mem) + goto out_unmap_membase; + dma_mem->bitmap = bitmap_zalloc(pages, GFP_KERNEL); + if (!dma_mem->bitmap) + goto out_free_dma_mem; dma_mem->virt_base = mem_base; dma_mem->device_base = device_addr; dma_mem->pfn_base = PFN_DOWN(phys_addr); dma_mem->size = pages; + dma_mem->use_dev_dma_pfn_offset = use_dma_pfn_offset; spin_lock_init(&dma_mem->spinlock); - *mem = dma_mem; - return 0; + return dma_mem; -out: +out_free_dma_mem: kfree(dma_mem); - if (mem_base) - memunmap(mem_base); - return ret; +out_unmap_membase: + memunmap(mem_base); + pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %zd MiB\n", + &phys_addr, size / SZ_1M); + return ERR_PTR(-ENOMEM); } -static void dma_release_coherent_memory(struct dma_coherent_mem *mem) +static void _dma_release_coherent_memory(struct dma_coherent_mem *mem) { if (!mem) return; memunmap(mem->virt_base); - kfree(mem->bitmap); + bitmap_free(mem->bitmap); kfree(mem); } @@ -111,7 +101,7 @@ static int dma_assign_coherent_memory(struct device *dev, * Declare a region of memory to be handed out by dma_alloc_coherent() when it * is asked for coherent memory for this device. This shall only be used * from platform code, usually based on the device tree description. - * + * * phys_addr is the CPU physical address to which the memory is currently * assigned (this will be ioremapped so the CPU can access the region). * @@ -130,16 +120,24 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, struct dma_coherent_mem *mem; int ret; - ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem); - if (ret) - return ret; + mem = dma_init_coherent_memory(phys_addr, device_addr, size, false); + if (IS_ERR(mem)) + return PTR_ERR(mem); ret = dma_assign_coherent_memory(dev, mem); if (ret) - dma_release_coherent_memory(mem); + _dma_release_coherent_memory(mem); return ret; } +void dma_release_coherent_memory(struct device *dev) +{ + if (dev) { + _dma_release_coherent_memory(dev->dma_mem); + dev->dma_mem = NULL; + } +} + static void *__dma_alloc_from_coherent(struct device *dev, struct dma_coherent_mem *mem, ssize_t size, dma_addr_t *dma_handle) @@ -198,16 +196,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, return 1; } -void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle) -{ - if (!dma_coherent_default_memory) - return NULL; - - return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size, - dma_handle); -} - static int __dma_release_from_coherent(struct dma_coherent_mem *mem, int order, void *vaddr) { @@ -243,15 +231,6 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr) return __dma_release_from_coherent(mem, order, vaddr); } -int dma_release_from_global_coherent(int order, void *vaddr) -{ - if (!dma_coherent_default_memory) - return 0; - - return __dma_release_from_coherent(dma_coherent_default_memory, order, - vaddr); -} - static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem, struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) { @@ -297,6 +276,28 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret); } +#ifdef CONFIG_DMA_GLOBAL_POOL +static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init; + +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle) +{ + if (!dma_coherent_default_memory) + return NULL; + + return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size, + dma_handle); +} + +int dma_release_from_global_coherent(int order, void *vaddr) +{ + if (!dma_coherent_default_memory) + return 0; + + return __dma_release_from_coherent(dma_coherent_default_memory, order, + vaddr); +} + int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, size_t size, int *ret) { @@ -307,6 +308,19 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, vaddr, size, ret); } +int dma_init_global_coherent(phys_addr_t phys_addr, size_t size) +{ + struct dma_coherent_mem *mem; + + mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true); + if (IS_ERR(mem)) + return PTR_ERR(mem); + dma_coherent_default_memory = mem; + pr_info("DMA: default coherent area is set\n"); + return 0; +} +#endif /* CONFIG_DMA_GLOBAL_POOL */ + /* * Support for reserved memory regions defined in device tree */ @@ -315,25 +329,23 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr, #include <linux/of_fdt.h> #include <linux/of_reserved_mem.h> -static struct reserved_mem *dma_reserved_default_memory __initdata; +#ifdef CONFIG_DMA_GLOBAL_POOL +static phys_addr_t dma_reserved_default_memory_base __initdata; +static phys_addr_t dma_reserved_default_memory_size __initdata; +#endif static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - struct dma_coherent_mem *mem = rmem->priv; - int ret; - - if (!mem) { - ret = dma_init_coherent_memory(rmem->base, rmem->base, - rmem->size, &mem); - if (ret) { - pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - return ret; - } + if (!rmem->priv) { + struct dma_coherent_mem *mem; + + mem = dma_init_coherent_memory(rmem->base, rmem->base, + rmem->size, true); + if (IS_ERR(mem)) + return PTR_ERR(mem); + rmem->priv = mem; } - mem->use_dev_dma_pfn_offset = true; - rmem->priv = mem; - dma_assign_coherent_memory(dev, mem); + dma_assign_coherent_memory(dev, rmem->priv); return 0; } @@ -361,11 +373,14 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) pr_err("Reserved memory: regions without no-map are not yet supported\n"); return -EINVAL; } +#endif +#ifdef CONFIG_DMA_GLOBAL_POOL if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { - WARN(dma_reserved_default_memory, + WARN(dma_reserved_default_memory_size, "Reserved memory: region for default DMA coherent area is redefined\n"); - dma_reserved_default_memory = rmem; + dma_reserved_default_memory_base = rmem->base; + dma_reserved_default_memory_size = rmem->size; } #endif @@ -375,31 +390,16 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) return 0; } +#ifdef CONFIG_DMA_GLOBAL_POOL static int __init dma_init_reserved_memory(void) { - const struct reserved_mem_ops *ops; - int ret; - - if (!dma_reserved_default_memory) + if (!dma_reserved_default_memory_size) return -ENOMEM; - - ops = dma_reserved_default_memory->ops; - - /* - * We rely on rmem_dma_device_init() does not propagate error of - * dma_assign_coherent_memory() for "NULL" device. - */ - ret = ops->device_init(dma_reserved_default_memory, NULL); - - if (!ret) { - dma_coherent_default_memory = dma_reserved_default_memory->priv; - pr_info("DMA: default coherent area is set\n"); - } - - return ret; + return dma_init_global_coherent(dma_reserved_default_memory_base, + dma_reserved_default_memory_size); } - core_initcall(dma_init_reserved_memory); +#endif /* CONFIG_DMA_GLOBAL_POOL */ RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup); #endif diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 3d63d91cba5c..055da410ac71 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -37,12 +37,6 @@ #define pr_fmt(fmt) "cma: " fmt -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - #include <asm/page.h> #include <linux/memblock.h> @@ -50,6 +44,7 @@ #include <linux/sizes.h> #include <linux/dma-map-ops.h> #include <linux/cma.h> +#include <linux/nospec.h> #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES @@ -96,11 +91,44 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); -#ifdef CONFIG_DMA_PERNUMA_CMA +#ifdef CONFIG_DMA_NUMA_CMA +static struct cma *dma_contiguous_numa_area[MAX_NUMNODES]; +static phys_addr_t numa_cma_size[MAX_NUMNODES] __initdata; static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES]; static phys_addr_t pernuma_size_bytes __initdata; +static int __init early_numa_cma(char *p) +{ + int nid, count = 0; + unsigned long tmp; + char *s = p; + + while (*s) { + if (sscanf(s, "%lu%n", &tmp, &count) != 1) + break; + + if (s[count] == ':') { + if (tmp >= MAX_NUMNODES) + break; + nid = array_index_nospec(tmp, MAX_NUMNODES); + + s += count + 1; + tmp = memparse(s, &s); + numa_cma_size[nid] = tmp; + + if (*s == ',') + s++; + else + break; + } else + break; + } + + return 0; +} +early_param("numa_cma", early_numa_cma); + static int __init early_cma_pernuma(char *p) { pernuma_size_bytes = memparse(p, &p); @@ -127,32 +155,49 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif -#ifdef CONFIG_DMA_PERNUMA_CMA -void __init dma_pernuma_cma_reserve(void) +#ifdef CONFIG_DMA_NUMA_CMA +static void __init dma_numa_cma_reserve(void) { int nid; - if (!pernuma_size_bytes) - return; - - for_each_online_node(nid) { + for_each_node(nid) { int ret; char name[CMA_MAX_NAME]; - struct cma **cma = &dma_contiguous_pernuma_area[nid]; - - snprintf(name, sizeof(name), "pernuma%d", nid); - ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, - 0, false, name, cma, nid); - if (ret) { - pr_warn("%s: reservation failed: err %d, node %d", __func__, - ret, nid); + struct cma **cma; + + if (!node_online(nid)) { + if (pernuma_size_bytes || numa_cma_size[nid]) + pr_warn("invalid node %d specified\n", nid); continue; } - pr_debug("%s: reserved %llu MiB on node %d\n", __func__, - (unsigned long long)pernuma_size_bytes / SZ_1M, nid); + if (pernuma_size_bytes) { + + cma = &dma_contiguous_pernuma_area[nid]; + snprintf(name, sizeof(name), "pernuma%d", nid); + ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, + 0, false, name, cma, nid); + if (ret) + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + } + + if (numa_cma_size[nid]) { + + cma = &dma_contiguous_numa_area[nid]; + snprintf(name, sizeof(name), "numa%d", nid); + ret = cma_declare_contiguous_nid(0, numa_cma_size[nid], 0, 0, 0, false, + name, cma, nid); + if (ret) + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + } } } +#else +static inline void __init dma_numa_cma_reserve(void) +{ +} #endif /** @@ -171,6 +216,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit) phys_addr_t selected_limit = limit; bool fixed = false; + dma_numa_cma_reserve(); + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) { @@ -303,7 +350,7 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { -#ifdef CONFIG_DMA_PERNUMA_CMA +#ifdef CONFIG_DMA_NUMA_CMA int nid = dev_to_node(dev); #endif @@ -315,7 +362,7 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) if (size <= PAGE_SIZE) return NULL; -#ifdef CONFIG_DMA_PERNUMA_CMA +#ifdef CONFIG_DMA_NUMA_CMA if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) { struct cma *cma = dma_contiguous_pernuma_area[nid]; struct page *page; @@ -325,6 +372,13 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) if (page) return page; } + + cma = dma_contiguous_numa_area[nid]; + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } } #endif if (!dma_contiguous_default_area) @@ -356,10 +410,13 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) /* * otherwise, page is from either per-numa cma or default cma */ -#ifdef CONFIG_DMA_PERNUMA_CMA +#ifdef CONFIG_DMA_NUMA_CMA if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)], page, count)) return; + if (cma_release(dma_contiguous_numa_area[page_to_nid(page)], + page, count)) + return; #endif if (cma_release(dma_contiguous_default_area, page, count)) return; @@ -399,8 +456,6 @@ static const struct reserved_mem_ops rmem_cma_ops = { static int __init rmem_cma_setup(struct reserved_mem *rmem) { - phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); - phys_addr_t mask = align - 1; unsigned long node = rmem->fdt_node; bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); struct cma *cma; @@ -416,7 +471,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; - if ((rmem->base & mask) || (rmem->size & mask)) { + if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) { pr_err("Reserved memory: incorrect alignment of CMA region\n"); return -EINVAL; } diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 14de1271463f..e43c6de2bce4 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -53,15 +53,16 @@ enum map_err_types { * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping * @list: node on pre-allocated free_entries list * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @dev_addr: dma address * @size: length of the mapping * @type: single, page, sg, coherent * @direction: enum dma_data_direction * @sg_call_ents: 'nents' from dma_map_sg * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn + * @paddr: physical start address of the mapping * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected + * @stack_len: number of backtrace entries in @stack_entries + * @stack_entries: stack of backtrace history */ struct dma_debug_entry { struct list_head list; @@ -72,8 +73,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; - unsigned long pfn; - size_t offset; + phys_addr_t paddr; enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE unsigned int stack_len; @@ -138,7 +138,7 @@ static const char *const maperr2str[] = { static const char *type2name[] = { [dma_debug_single] = "single", - [dma_debug_sg] = "scather-gather", + [dma_debug_sg] = "scatter-gather", [dma_debug_coherent] = "coherent", [dma_debug_resource] = "resource", }; @@ -350,11 +350,10 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, unsigned long *flags) { - unsigned int max_range = dma_get_max_seg_size(ref->dev); struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; + int limit = min(HASH_SIZE, (index.dev_addr >> HASH_FN_SHIFT) + 1); - while (range <= max_range) { + for (int i = 0; i < limit; i++) { entry = __hash_bucket_find(*bucket, ref, containing_match); if (entry) @@ -364,7 +363,6 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, * Nothing found, go back a hash bucket */ put_hash_bucket(*bucket, *flags); - range += (1 << HASH_FN_SHIFT); index.dev_addr -= (1 << HASH_FN_SHIFT); *bucket = get_hash_bucket(&index, flags); } @@ -389,45 +387,6 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - -/* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - cond_resched(); - } -} - /* * For each mapping (initial cacheline in the case of * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a @@ -447,8 +406,11 @@ void debug_dma_dump_mappings(struct device *dev) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) @@ -456,8 +418,8 @@ static DEFINE_SPINLOCK(radix_lock); static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) { - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); + return ((entry->paddr >> PAGE_SHIFT) << CACHELINE_PER_PAGE_SHIFT) + + (offset_in_page(entry->paddr) >> L1_CACHE_SHIFT); } static int active_cacheline_read_overlap(phys_addr_t cln) @@ -549,10 +511,74 @@ static void active_cacheline_remove(struct dma_debug_entry *entry) } /* + * Dump mappings entries on kernel space for debugging purposes + */ +void debug_dma_dump_mappings(struct device *dev) +{ + int idx; + phys_addr_t cln; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + cln = to_cacheline_number(entry); + dev_info(entry->dev, + "%s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", + type2name[entry->type], idx, + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], + maperr2str[entry->map_err_type]); + } + } + spin_unlock_irqrestore(&bucket->lock, flags); + + cond_resched(); + } +} + +/* + * Dump mappings entries on user space via debugfs + */ +static int dump_show(struct seq_file *seq, void *v) +{ + int idx; + phys_addr_t cln; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + list_for_each_entry(entry, &bucket->list, list) { + cln = to_cacheline_number(entry); + seq_printf(seq, + "%s %s %s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", + dev_driver_string(entry->dev), + dev_name(entry->dev), + type2name[entry->type], idx, + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], + maperr2str[entry->map_err_type]); + } + spin_unlock_irqrestore(&bucket->lock, flags); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(dump); + +/* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ -static void add_dma_entry(struct dma_debug_entry *entry) +static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) { struct hash_bucket *bucket; unsigned long flags; @@ -564,13 +590,12 @@ static void add_dma_entry(struct dma_debug_entry *entry) rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); + pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + err_printk(entry->dev, entry, + "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ } static int dma_debug_create_entries(gfp_t gfp) @@ -606,15 +631,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void) return entry; } -static void __dma_entry_alloc_check_leak(void) +/* + * This should be called outside of free_entries_lock scope to avoid potential + * deadlocks with serial consoles that use DMA. + */ +static void __dma_entry_alloc_check_leak(u32 nr_entries) { - u32 tmp = nr_total_entries % nr_prealloc_entries; + u32 tmp = nr_entries % nr_prealloc_entries; /* Shout each time we tick over some multiple of the initial pool */ if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) { pr_info("dma_debug_entry pool grown to %u (%u00%%)\n", - nr_total_entries, - (nr_total_entries / nr_prealloc_entries)); + nr_entries, + (nr_entries / nr_prealloc_entries)); } } @@ -625,8 +654,10 @@ static void __dma_entry_alloc_check_leak(void) */ static struct dma_debug_entry *dma_entry_alloc(void) { + bool alloc_check_leak = false; struct dma_debug_entry *entry; unsigned long flags; + u32 nr_entries; spin_lock_irqsave(&free_entries_lock, flags); if (num_free_entries == 0) { @@ -636,13 +667,17 @@ static struct dma_debug_entry *dma_entry_alloc(void) pr_err("debugging out of memory - disabling\n"); return NULL; } - __dma_entry_alloc_check_leak(); + alloc_check_leak = true; + nr_entries = nr_total_entries; } entry = __dma_entry_alloc(); spin_unlock_irqrestore(&free_entries_lock, flags); + if (alloc_check_leak) + __dma_entry_alloc_check_leak(nr_entries); + #ifdef CONFIG_STACKTRACE entry->stack_len = stack_trace_save(entry->stack_entries, ARRAY_SIZE(entry->stack_entries), @@ -767,34 +802,7 @@ static const struct file_operations filter_fops = { .llseek = default_llseek, }; -static int dump_show(struct seq_file *seq, void *v) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - list_for_each_entry(entry, &bucket->list, list) { - seq_printf(seq, - "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx %s %s\n", - dev_name(entry->dev), - dev_driver_string(entry->dev), - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - spin_unlock_irqrestore(&bucket->lock, flags); - } - return 0; -} -DEFINE_SHOW_ATTRIBUTE(dump); - -static void dma_debug_fs_init(void) +static int __init dma_debug_fs_init(void) { struct dentry *dentry = debugfs_create_dir("dma-api", NULL); @@ -807,7 +815,10 @@ static void dma_debug_fs_init(void) debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries); debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops); debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops); + + return 0; } +core_initcall_sync(dma_debug_fs_init); static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) { @@ -859,7 +870,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti return 0; } -void dma_debug_add_bus(struct bus_type *bus) +void dma_debug_add_bus(const struct bus_type *bus) { struct notifier_block *nb; @@ -892,8 +903,6 @@ static int dma_debug_init(void) spin_lock_init(&dma_entry_hash[i].lock); } - dma_debug_fs_init(); - nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES); for (i = 0; i < nr_pages; ++i) dma_debug_create_entries(GFP_KERNEL); @@ -927,7 +936,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -936,7 +945,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); @@ -984,16 +993,16 @@ static void check_unmap(struct dma_debug_entry *ref) "[mapped as %s] [unmapped as %s]\n", ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { + } else if (entry->type == dma_debug_coherent && + ref->paddr != entry->paddr) { err_printk(ref->dev, entry, "device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", + "[cpu alloc address=0x%pa] " + "[cpu free address=0x%pa]", ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); + &entry->paddr, + &ref->paddr); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1033,9 +1042,13 @@ static void check_unmap(struct dma_debug_entry *ref) } hash_bucket_del(entry); - dma_entry_free(entry); - put_hash_bucket(bucket, flags); + + /* + * Free the entry outside of bucket_lock to avoid ABBA deadlocks + * between that and radix_lock. + */ + dma_entry_free(entry); } static void check_for_stack(struct device *dev, @@ -1066,20 +1079,10 @@ static void check_for_stack(struct device *dev, } } -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) { - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) + if (memory_intersects(_stext, _etext, addr, len) || + memory_intersects(__start_rodata, __end_rodata, addr, len)) err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); } @@ -1160,7 +1163,6 @@ out: static void check_sg_segment(struct device *dev, struct scatterlist *sg) { -#ifdef CONFIG_DMA_API_DEBUG_SG unsigned int max_seg = dma_get_max_seg_size(dev); u64 start, end, boundary = dma_get_seg_boundary(dev); @@ -1181,7 +1183,6 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) if ((start ^ end) & ~boundary) err_printk(dev, NULL, "mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", start, end, boundary); -#endif } void debug_dma_map_single(struct device *dev, const void *addr, @@ -1201,7 +1202,8 @@ void debug_dma_map_single(struct device *dev, const void *addr, EXPORT_SYMBOL(debug_dma_map_single); void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr) + size_t size, int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1217,8 +1219,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; - entry->pfn = page_to_pfn(page); - entry->offset = offset; + entry->paddr = page_to_phys(page) + offset; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1232,7 +1233,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, check_for_illegal_area(dev, addr, size); } - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -1273,13 +1274,13 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) } EXPORT_SYMBOL(debug_dma_mapping_error); -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, +void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) { struct dma_debug_entry ref = { .type = dma_debug_single, .dev = dev, - .dev_addr = addr, + .dev_addr = dma_addr, .size = size, .direction = direction, }; @@ -1290,7 +1291,8 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, } void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) + int nents, int mapped_ents, int direction, + unsigned long attrs) { struct dma_debug_entry *entry; struct scatterlist *s; @@ -1299,6 +1301,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return; + for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1306,23 +1314,16 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset; + entry->paddr = sg_phys(s); entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s); - add_dma_entry(entry); + add_dma_entry(entry, attrs); } } @@ -1359,8 +1360,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1377,8 +1377,21 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, } } +static phys_addr_t virt_to_paddr(void *virt) +{ + struct page *page; + + if (is_vmalloc_addr(virt)) + page = vmalloc_to_page(virt); + else + page = virt_to_page(virt); + + return page_to_phys(page) + offset_in_page(virt); +} + void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) + dma_addr_t dma_addr, void *virt, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1398,27 +1411,21 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->offset = offset_in_page(virt); + entry->paddr = virt_to_paddr(virt); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) + void *virt, dma_addr_t dma_addr) { struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .offset = offset_in_page(virt), - .dev_addr = addr, + .dev_addr = dma_addr, .size = size, .direction = DMA_BIDIRECTIONAL, }; @@ -1427,10 +1434,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); + ref.paddr = virt_to_paddr(virt); if (unlikely(dma_debug_disabled())) return; @@ -1439,7 +1443,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, } void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) + int direction, dma_addr_t dma_addr, + unsigned long attrs) { struct dma_debug_entry *entry; @@ -1452,14 +1457,13 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, entry->type = dma_debug_resource; entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); + entry->paddr = addr; entry->size = size; entry->dev_addr = dma_addr; entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - add_dma_entry(entry); + add_dma_entry(entry, attrs); } void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, @@ -1530,8 +1534,7 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1562,8 +1565,7 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(sg), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h index 83643b3010b2..f525197d3cae 100644 --- a/kernel/dma/debug.h +++ b/kernel/dma/debug.h @@ -11,26 +11,30 @@ #ifdef CONFIG_DMA_API_DEBUG extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction, dma_addr_t dma_addr); + int direction, dma_addr_t dma_addr, + unsigned long attrs); extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction); extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction); + int nents, int mapped_ents, int direction, + unsigned long attrs); extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir); extern void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt); + dma_addr_t dma_addr, void *virt, + unsigned long attrs); extern void debug_dma_free_coherent(struct device *dev, size_t size, void *virt, dma_addr_t addr); extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, int direction, - dma_addr_t dma_addr); + dma_addr_t dma_addr, + unsigned long attrs); extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction); @@ -53,7 +57,8 @@ extern void debug_dma_sync_sg_for_device(struct device *dev, #else /* CONFIG_DMA_API_DEBUG */ static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, - int direction, dma_addr_t dma_addr) + int direction, dma_addr_t dma_addr, + unsigned long attrs) { } @@ -63,7 +68,8 @@ static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, } static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) + int nents, int mapped_ents, int direction, + unsigned long attrs) { } @@ -74,7 +80,8 @@ static inline void debug_dma_unmap_sg(struct device *dev, } static inline void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) + dma_addr_t dma_addr, void *virt, + unsigned long attrs) { } @@ -85,7 +92,8 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size, static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, int direction, - dma_addr_t dma_addr) + dma_addr_t dma_addr, + unsigned long attrs) { } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 002268262c9a..b8fe0b3d0ffb 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -20,7 +20,7 @@ * it for entirely different regions. In that case the arch code needs to * override the variable below for dma-direct to work properly. */ -unsigned int zone_dma_bits __ro_after_init = 24; +u64 zone_dma_limit __ro_after_init = DMA_BIT_MASK(24); static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) @@ -44,10 +44,11 @@ u64 dma_direct_get_required_mask(struct device *dev) return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; } -static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, - u64 *phys_limit) +static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) { - u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); + u64 dma_limit = min_not_zero( + dev->coherent_dma_mask, + dev->bus_dma_limit); /* * Optimistically try the zone that the physical address mask falls @@ -58,14 +59,14 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * zones. */ *phys_limit = dma_to_phys(dev, dma_limit); - if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + if (*phys_limit <= zone_dma_limit) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) return GFP_DMA32; return 0; } -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); @@ -75,8 +76,47 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } +static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) +{ + if (!force_dma_unencrypted(dev)) + return 0; + return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); +} + +static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) +{ + int ret; + + if (!force_dma_unencrypted(dev)) + return 0; + ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); + if (ret) + pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); + return ret; +} + +static void __dma_direct_free_pages(struct device *dev, struct page *page, + size_t size) +{ + if (swiotlb_free(dev, page, size)) + return; + dma_free_contiguous(dev, page, size); +} + +static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size) +{ + struct page *page = swiotlb_alloc(dev, size); + + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + swiotlb_free(dev, page, size); + return NULL; + } + + return page; +} + static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp) + gfp_t gfp, bool allow_highmem) { int node = dev_to_node(dev); struct page *page = NULL; @@ -84,18 +124,23 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_limit); + if (is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_swiotlb(dev, size); + + gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_free_contiguous(dev, page, size); - page = NULL; + if (page) { + if (!dma_coherent_ok(dev, page_to_phys(page), size) || + (!allow_highmem && PageHighMem(page))) { + dma_free_contiguous(dev, page, size); + page = NULL; + } } again: if (!page) page = alloc_pages_node(node, gfp, get_order(size)); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_free_contiguous(dev, page, size); + __free_pages(page, get_order(size)); page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && @@ -114,15 +159,26 @@ again: return page; } +/* + * Check if a potentially blocking operations needs to dip into the atomic + * pools for the given device/gfp. + */ +static bool dma_direct_use_pool(struct device *dev, gfp_t gfp) +{ + return !gfpflags_allow_blocking(gfp) && !is_swiotlb_for_alloc(dev); +} + static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct page *page; - u64 phys_mask; + u64 phys_limit; void *ret; - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); + if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))) + return NULL; + + gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); if (!page) return NULL; @@ -130,114 +186,128 @@ static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, return ret; } +static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct page *page; + + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); + if (!page) + return NULL; + + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + + /* return the page pointer as the opaque cookie */ + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +} + void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + bool remap = false, set_uncached = false; struct page *page; void *ret; - int err; size = PAGE_ALIGN(size); if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { - page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); - if (!page) + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) + return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); + + if (!dev_is_dma_coherent(dev)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && + !is_swiotlb_for_alloc(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, + attrs); + + /* + * If there is a global pool, always allocate from it for + * non-coherent devices. + */ + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL)) + return dma_alloc_from_global_coherent(dev, size, + dma_handle); + + /* + * Otherwise we require the architecture to either be able to + * mark arbitrary parts of the kernel direct mapping uncached, + * or remapped it uncached. + */ + set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED); + remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); + if (!set_uncached && !remap) { + pr_warn_once("coherent DMA allocations not supported on this platform.\n"); return NULL; - /* remove any dirty cache lines on the kernel alias */ - if (!PageHighMem(page)) - arch_dma_prep_coherent(page, size); - *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); - /* return the page pointer as the opaque cookie */ - return page; + } } - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - /* - * Remapping or decrypting memory may block. If either is required and - * we can't block, allocate the memory from the atomic pools. + * Remapping or decrypting memory may block, allocate the memory from + * the atomic pools instead if we aren't allowed block. */ - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - !gfpflags_allow_blocking(gfp) && - (force_dma_unencrypted(dev) || - (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev)))) + if ((remap || force_dma_unencrypted(dev)) && + dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ - page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true); if (!page) return NULL; - if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) || - (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) { + /* + * dma_alloc_contiguous can return highmem pages depending on a + * combination the cma= arguments and per-arch setup. These need to be + * remapped to return a kernel virtual address. + */ + if (PageHighMem(page)) { + remap = true; + set_uncached = false; + } + + if (remap) { + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); + + if (force_dma_unencrypted(dev)) + prot = pgprot_decrypted(prot); + /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); /* create a coherent mapping */ - ret = dma_common_contiguous_remap(page, size, - dma_pgprot(dev, PAGE_KERNEL, attrs), + ret = dma_common_contiguous_remap(page, size, prot, __builtin_return_address(0)); if (!ret) goto out_free_pages; - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); - if (err) - goto out_free_pages; - } - memset(ret, 0, size); - goto done; - } - - if (PageHighMem(page)) { - /* - * Depending on the cma= arguments and per-arch setup - * dma_alloc_contiguous could return highmem pages. - * Without remapping there is no way to return them here, - * so log an error and fail. - */ - dev_info(dev, "Rejecting highmem page from CMA.\n"); - goto out_free_pages; - } - - ret = page_address(page); - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - 1 << get_order(size)); - if (err) - goto out_free_pages; + } else { + ret = page_address(page); + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; } memset(ret, 0, size); - if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !dev_is_dma_coherent(dev)) { + if (set_uncached) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) goto out_encrypt_pages; } -done: + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: - if (force_dma_unencrypted(dev)) { - err = set_memory_encrypted((unsigned long)page_address(page), - 1 << get_order(size)); - /* If memory cannot be re-encrypted, it must be leaked */ - if (err) - return NULL; - } + if (dma_set_encrypted(dev, page_address(page), size)) + return NULL; out_free_pages: - dma_free_contiguous(dev, page, size); + __dma_direct_free_pages(dev, page, size); + return NULL; +out_leak_pages: return NULL; } @@ -247,33 +317,41 @@ void dma_direct_free(struct device *dev, size_t size, unsigned int page_order = get_order(size); if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { + !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ dma_free_contiguous(dev, cpu_addr, size); return; } - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !dev_is_dma_coherent(dev)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && + !dev_is_dma_coherent(dev) && + !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); return; } + if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + !dev_is_dma_coherent(dev)) { + if (!dma_release_from_global_coherent(page_order, cpu_addr)) + WARN_ON_ONCE(1); + return; + } + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; - if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) + if (is_vmalloc_addr(cpu_addr)) { vunmap(cpu_addr); - else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) - arch_dma_clear_uncached(cpu_addr, size); + } else { + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) + arch_dma_clear_uncached(cpu_addr, size); + if (dma_set_encrypted(dev, cpu_addr, size)) + return; + } - dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); + __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size); } struct page *dma_direct_alloc_pages(struct device *dev, size_t size, @@ -282,35 +360,20 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, struct page *page; void *ret; - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && - force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) + if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); - page = __dma_direct_alloc_pages(dev, size, gfp); + page = __dma_direct_alloc_pages(dev, size, gfp, false); if (!page) return NULL; - if (PageHighMem(page)) { - /* - * Depending on the cma= arguments and per-arch setup - * dma_alloc_contiguous could return highmem pages. - * Without remapping there is no way to return them here, - * so log an error and fail. - */ - dev_info(dev, "Rejecting highmem page from CMA.\n"); - goto out_free_pages; - } ret = page_address(page); - if (force_dma_unencrypted(dev)) { - if (set_memory_decrypted((unsigned long)ret, - 1 << get_order(size))) - goto out_free_pages; - } + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; -out_free_pages: - dma_free_contiguous(dev, page, size); +out_leak_pages: return NULL; } @@ -318,7 +381,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir) { - unsigned int page_order = get_order(size); void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -326,10 +388,9 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)vaddr, 1 << page_order); - - dma_free_contiguous(dev, page, size); + if (dma_set_encrypted(dev, vaddr, size)) + return; + __dma_direct_free_pages(dev, page, size); } #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ @@ -343,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, sg->length, - dir, SYNC_FOR_DEVICE); + swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, @@ -369,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, - SYNC_FOR_CPU); + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, sg->length); @@ -381,29 +438,60 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } +/* + * Unmaps segments, except for ones marked as pci_p2pdma which do not + * require any further action as they contain a bus address. + */ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nents, i) - dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, - attrs); + for_each_sg(sgl, sg, nents, i) { + if (sg_dma_is_bus_address(sg)) + sg_dma_unmark_bus_address(sg); + else + dma_direct_unmap_page(dev, sg->dma_address, + sg_dma_len(sg), dir, attrs); + } } #endif int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { - int i; + struct pci_p2pdma_map_state p2pdma_state = {}; + enum pci_p2pdma_map_type map; struct scatterlist *sg; + int i, ret; for_each_sg(sgl, sg, nents, i) { + if (is_pci_p2pdma_page(sg_page(sg))) { + map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg); + switch (map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + continue; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Any P2P mapping that traverses the PCI + * host bridge must be mapped with CPU physical + * address and not PCI bus addresses. This is + * done with dma_direct_map_page() below. + */ + break; + default: + ret = -EREMOTEIO; + goto out_unmap; + } + } + sg->dma_address = dma_direct_map_page(dev, sg_page(sg), sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) + if (sg->dma_address == DMA_MAPPING_ERROR) { + ret = -EIO; goto out_unmap; + } sg_dma_len(sg) = sg->length; } @@ -411,7 +499,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, out_unmap: dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return 0; + return ret; } dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, @@ -459,9 +547,13 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + if (force_dma_unencrypted(dev)) + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; + if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) + return ret; if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff) return -ENXIO; @@ -488,15 +580,63 @@ int dma_direct_supported(struct device *dev, u64 mask) * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); + min_mask = min_t(u64, min_mask, zone_dma_limit); return mask >= phys_to_dma_unencrypted(dev, min_mask); } +static const struct bus_dma_region *dma_find_range(struct device *dev, + unsigned long start_pfn) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { + unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); + + if (start_pfn >= cpu_start_pfn && + start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) + return m; + } + + return NULL; +} + +/* + * To check whether all ram resource ranges are covered by dma range map + * Returns 0 when further check is needed + * Returns 1 if there is some RAM range can't be covered by dma_range_map + */ +static int check_ram_in_range_map(unsigned long start_pfn, + unsigned long nr_pages, void *data) +{ + unsigned long end_pfn = start_pfn + nr_pages; + struct device *dev = data; + + while (start_pfn < end_pfn) { + const struct bus_dma_region *bdr; + + bdr = dma_find_range(dev, start_pfn); + if (!bdr) + return 1; + + start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); + } + + return 0; +} + +bool dma_direct_all_ram_mapped(struct device *dev) +{ + if (!dev->dma_range_map) + return true; + return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, + check_ram_in_range_map); +} + size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ - if (is_swiotlb_active() && - (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) + if (is_swiotlb_active(dev) && + (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev))) return swiotlb_max_mapping_size(dev); return SIZE_MAX; } @@ -504,7 +644,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); + swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** @@ -542,7 +682,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, return -ENOMEM; map[0].cpu_start = cpu_start; map[0].dma_start = dma_start; - map[0].offset = offset; map[0].size = size; dev->dma_range_map = map; return 0; diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index b98615578737..d2c0b7e632fc 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -8,6 +8,7 @@ #define _KERNEL_DMA_DIRECT_H #include <linux/dma-direct.h> +#include <linux/memremap.h> int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, @@ -19,6 +20,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +bool dma_direct_all_ram_mapped(struct device *dev); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ @@ -56,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); @@ -73,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); @@ -87,11 +87,17 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + if (is_swiotlb_force_bounce(dev)) { + if (is_pci_p2pdma_page(page)) + return DMA_MAPPING_ERROR; return swiotlb_map(dev, phys, size, dir, attrs); + } - if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - if (swiotlb_force != SWIOTLB_NO_FORCE) + if (unlikely(!dma_capable(dev, dma_addr, size, true)) || + dma_kmalloc_needs_bounce(dev, size, dir)) { + if (is_pci_p2pdma_page(page)) + return DMA_MAPPING_ERROR; + if (is_swiotlb_active(dev)) return swiotlb_map(dev, phys, size, dir, attrs); dev_WARN_ONCE(dev, 1, @@ -113,7 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); + swiotlb_tbl_unmap_single(dev, phys, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index eacd4c5b10bf..92de80e5b057 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -17,12 +17,31 @@ static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, { return DMA_MAPPING_ERROR; } +static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_page, so unmap_page should never be + * called. + */ + WARN_ON_ONCE(true); +} static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { - return 0; + return -EINVAL; +} + +static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. + */ + WARN_ON_ONCE(true); } static int dma_dummy_supported(struct device *hwdev, u64 mask) @@ -33,6 +52,8 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask) const struct dma_map_ops dma_dummy_ops = { .mmap = dma_dummy_mmap, .map_page = dma_dummy_map_page, + .unmap_page = dma_dummy_unmap_page, .map_sg = dma_dummy_map_sg, + .unmap_sg = dma_dummy_unmap_sg, .dma_supported = dma_dummy_supported, }; diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index da95df381483..cc19a3efea89 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020 Hisilicon Limited. + * Copyright (C) 2020 HiSilicon Limited. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -11,6 +11,7 @@ #include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/kthread.h> +#include <linux/map_benchmark.h> #include <linux/math64.h> #include <linux/module.h> #include <linux/pci.h> @@ -18,27 +19,6 @@ #include <linux/slab.h> #include <linux/timekeeping.h> -#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) -#define DMA_MAP_MAX_THREADS 1024 -#define DMA_MAP_MAX_SECONDS 300 - -#define DMA_MAP_BIDIRECTIONAL 0 -#define DMA_MAP_TO_DEVICE 1 -#define DMA_MAP_FROM_DEVICE 2 - -struct map_benchmark { - __u64 avg_map_100ns; /* average map latency in 100ns */ - __u64 map_stddev; /* standard deviation of map latency */ - __u64 avg_unmap_100ns; /* as above */ - __u64 unmap_stddev; - __u32 threads; /* how many threads will do map/unmap in parallel */ - __u32 seconds; /* how long the test will last */ - __s32 node; /* which numa node this benchmark will run on */ - __u32 dma_bits; /* DMA addressing capability */ - __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ -}; - struct map_benchmark_data { struct map_benchmark bparam; struct device *dev; @@ -56,9 +36,11 @@ static int map_benchmark_thread(void *data) void *buf; dma_addr_t dma_addr; struct map_benchmark_data *map = data; + int npages = map->bparam.granule; + u64 size = npages * PAGE_SIZE; int ret = 0; - buf = (void *)__get_free_page(GFP_KERNEL); + buf = alloc_pages_exact(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -74,10 +56,10 @@ static int map_benchmark_thread(void *data) * 66 means evertything goes well! 66 is lucky. */ if (map->dir != DMA_FROM_DEVICE) - memset(buf, 0x66, PAGE_SIZE); + memset(buf, 0x66, size); map_stime = ktime_get(); - dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir); + dma_addr = dma_map_single(map->dev, buf, size, map->dir); if (unlikely(dma_mapping_error(map->dev, dma_addr))) { pr_err("dma_map_single failed on %s\n", dev_name(map->dev)); @@ -87,8 +69,11 @@ static int map_benchmark_thread(void *data) map_etime = ktime_get(); map_delta = ktime_sub(map_etime, map_stime); + /* Pretend DMA is transmitting */ + ndelay(map->bparam.dma_trans_ns); + unmap_stime = ktime_get(); - dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); + dma_unmap_single(map->dev, dma_addr, size, map->dir); unmap_etime = ktime_get(); unmap_delta = ktime_sub(unmap_etime, unmap_stime); @@ -104,10 +89,26 @@ static int map_benchmark_thread(void *data) atomic64_add(map_sq, &map->sum_sq_map); atomic64_add(unmap_sq, &map->sum_sq_unmap); atomic64_inc(&map->loops); + + /* + * We may test for a long time so periodically check whether + * we need to schedule to avoid starving the others. Otherwise + * we may hangup the kernel in a non-preemptible kernel when + * the test kthreads number >= CPU number, the test kthreads + * will run endless on every CPU since the thread resposible + * for notifying the kthread stop (in do_map_benchmark()) + * could not be scheduled. + * + * Note this may degrade the test concurrency since the test + * threads may need to share the CPU time with other load + * in the system. So it's recommended to run this benchmark + * on an idle system. + */ + cond_resched(); } out: - free_page((unsigned long)buf); + free_pages_exact(buf, size); return ret; } @@ -116,7 +117,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) struct task_struct **tsk; int threads = map->bparam.threads; int node = map->bparam.node; - const cpumask_t *cpu_mask = cpumask_of_node(node); u64 loops; int ret = 0; int i; @@ -133,11 +133,13 @@ static int do_map_benchmark(struct map_benchmark_data *map) if (IS_ERR(tsk[i])) { pr_err("create dma_map thread failed\n"); ret = PTR_ERR(tsk[i]); + while (--i >= 0) + kthread_stop(tsk[i]); goto out; } if (node != NUMA_NO_NODE) - kthread_bind_mask(tsk[i], cpu_mask); + kthread_bind_mask(tsk[i], cpumask_of_node(node)); } /* clear the old value in the previous benchmark */ @@ -154,13 +156,17 @@ static int do_map_benchmark(struct map_benchmark_data *map) msleep_interruptible(map->bparam.seconds * 1000); - /* wait for the completion of benchmark threads */ + /* wait for the completion of all started benchmark threads */ for (i = 0; i < threads; i++) { - ret = kthread_stop(tsk[i]); - if (ret) - goto out; + int kthread_ret = kthread_stop_put(tsk[i]); + + if (kthread_ret) + ret = kthread_ret; } + if (ret) + goto out; + loops = atomic64_read(&map->loops); if (likely(loops > 0)) { u64 map_variance, unmap_variance; @@ -185,8 +191,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) } out: - for (i = 0; i < threads; i++) - put_task_struct(tsk[i]); put_device(map->dev); kfree(tsk); return ret; @@ -198,7 +202,6 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, struct map_benchmark_data *map = file->private_data; void __user *argp = (void __user *)arg; u64 old_dma_mask; - int ret; if (copy_from_user(&map->bparam, argp, sizeof(map->bparam))) @@ -218,12 +221,23 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } + if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) { + pr_err("invalid transmission delay\n"); + return -EINVAL; + } + if (map->bparam.node != NUMA_NO_NODE && - !node_possible(map->bparam.node)) { + (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES || + !node_possible(map->bparam.node))) { pr_err("invalid numa node\n"); return -EINVAL; } + if (map->bparam.granule < 1 || map->bparam.granule > 1024) { + pr_err("invalid granule size\n"); + return -EINVAL; + } + switch (map->bparam.dma_dir) { case DMA_MAP_BIDIRECTIONAL: map->dir = DMA_BIDIRECTIONAL; @@ -258,6 +272,9 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, * dma_mask changed by benchmark */ dma_set_mask(map->dev, old_dma_mask); + + if (ret) + return ret; break; default: return -EINVAL; @@ -362,4 +379,3 @@ module_exit(map_benchmark_cleanup); MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>"); MODULE_DESCRIPTION("dma_map benchmark driver"); -MODULE_LICENSE("GPL"); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f87a89d08654..cda127027e48 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -10,12 +10,23 @@ #include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/gfp.h> +#include <linux/iommu-dma.h> +#include <linux/kmsan.h> #include <linux/of_device.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include "debug.h" #include "direct.h" +#define CREATE_TRACE_POINTS +#include <trace/events/dma.h> + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +bool dma_default_coherent = IS_ENABLED(CONFIG_ARCH_DMA_DEFAULT_COHERENT); +#endif + /* * Managed DMA API */ @@ -60,8 +71,8 @@ void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, { struct dma_devres match_data = { size, vaddr, dma_handle }; - dma_free_coherent(dev, size, vaddr, dma_handle); WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data)); + dma_free_coherent(dev, size, vaddr, dma_handle); } EXPORT_SYMBOL(dmam_free_coherent); @@ -109,8 +120,12 @@ EXPORT_SYMBOL(dmam_alloc_attrs); static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { + if (use_dma_iommu(dev)) + return false; + if (likely(!ops)) return true; + #ifdef CONFIG_DMA_OPS_BYPASS if (dev->dma_ops_bypass) return min_not_zero(mask, dev->bus_dma_limit) >= @@ -152,9 +167,14 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); - debug_dma_map_page(dev, page, offset, size, dir, addr); + kmsan_handle_dma(page, offset, size, dir); + trace_dma_map_page(dev, page_to_phys(page) + offset, addr, size, dir, + attrs); + debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); return addr; } @@ -169,18 +189,17 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) + else if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, addr, size, dir, attrs); + else ops->unmap_page(dev, addr, size, dir, attrs); + trace_dma_unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_page_attrs); -/* - * dma_maps_sg_attrs returns 0 on error and > 0 on success. - * It should never return a value < 0. - */ -int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) +static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); int ents; @@ -193,15 +212,93 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); - BUG_ON(ents < 0); - debug_dma_map_sg(dev, sg, nents, ents, dir); + + if (ents > 0) { + kmsan_handle_dma_sg(sg, nents, dir); + trace_dma_map_sg(dev, sg, nents, ents, dir, attrs); + debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); + } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && + ents != -EIO && ents != -EREMOTEIO)) { + trace_dma_map_sg_err(dev, sg, nents, ents, dir, attrs); + return -EIO; + } return ents; } + +/** + * dma_map_sg_attrs - Map the given buffer for DMA + * @dev: The device for which to perform the DMA operation + * @sg: The sg_table object describing the buffer + * @nents: Number of entries to map + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Maps a buffer described by a scatterlist passed in the sg argument with + * nents segments for the @dir DMA operation by the @dev device. + * + * Returns the number of mapped entries (which can be less than nents) + * on success. Zero is returned for any error. + * + * dma_unmap_sg_attrs() should be used to unmap the buffer with the + * original sg and original nents (not the value returned by this funciton). + */ +unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + int ret; + + ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs); + if (ret < 0) + return 0; + return ret; +} EXPORT_SYMBOL(dma_map_sg_attrs); +/** + * dma_map_sgtable - Map the given buffer for DMA + * @dev: The device for which to perform the DMA operation + * @sgt: The sg_table object describing the buffer + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Maps a buffer described by a scatterlist stored in the given sg_table + * object for the @dir DMA operation by the @dev device. After success, the + * ownership for the buffer is transferred to the DMA domain. One has to + * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the + * ownership of the buffer back to the CPU domain before touching the + * buffer by the CPU. + * + * Returns 0 on success or a negative error code on error. The following + * error codes are supported with the given meaning: + * + * -EINVAL An invalid argument, unaligned access or other error + * in usage. Will not succeed if retried. + * -ENOMEM Insufficient resources (like memory or IOVA space) to + * complete the mapping. Should succeed if retried later. + * -EIO Legacy error code with an unknown meaning. eg. this is + * returned if a lower level call returned + * DMA_MAPPING_ERROR. + * -EREMOTEIO The DMA device cannot access P2PDMA memory specified + * in the sg_table. This will not succeed if retried. + */ +int dma_map_sgtable(struct device *dev, struct sg_table *sgt, + enum dma_data_direction dir, unsigned long attrs) +{ + int nents; + + nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs); + if (nents < 0) + return nents; + sgt->nents = nents; + return 0; +} +EXPORT_SYMBOL_GPL(dma_map_sgtable); + void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) @@ -209,10 +306,13 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); + trace_dma_unmap_sg(dev, sg, nents, dir, attrs); debug_dma_unmap_sg(dev, sg, nents, dir); if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } @@ -229,16 +329,15 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; - /* Don't allow RAM to be mapped */ - if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) - return DMA_MAPPING_ERROR; - if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); - debug_dma_map_resource(dev, phys_addr, size, dir, addr); + trace_dma_map_resource(dev, phys_addr, addr, size, dir, attrs); + debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); return addr; } EXPORT_SYMBOL(dma_map_resource); @@ -249,13 +348,19 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (!dma_map_direct(dev, ops) && ops->unmap_resource) + if (dma_map_direct(dev, ops)) + ; /* nothing to do: uncached and no swiotlb */ + else if (use_dma_iommu(dev)) + iommu_dma_unmap_resource(dev, addr, size, dir, attrs); + else if (ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); + trace_dma_unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_resource); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +#ifdef CONFIG_DMA_NEED_SYNC +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -263,13 +368,16 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); + trace_dma_sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); +EXPORT_SYMBOL(__dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -277,13 +385,16 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); + trace_dma_sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); +EXPORT_SYMBOL(__dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -291,13 +402,16 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); + trace_dma_sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -305,11 +419,54 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); + trace_dma_sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); + +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + /* + * dma_skip_sync could've been reset on first SWIOTLB buffer + * mapping, but @dma_addr is not necessary an SWIOTLB buffer. + * In this case, fall back to more granular check. + */ + return dma_direct_need_sync(dev, dma_addr); + return true; +} +EXPORT_SYMBOL_GPL(__dma_need_sync); + +static void dma_setup_need_sync(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) + /* + * dma_skip_sync will be reset to %false on first SWIOTLB buffer + * mapping, if any. During the device initialization, it's + * enough to check only for the DMA coherence. + */ + dev->dma_skip_sync = dev_is_dma_coherent(dev); + else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu && + !ops->sync_sg_for_device && !ops->sync_sg_for_cpu) + /* + * Synchronization is not possible when none of DMA sync ops + * is set. + */ + dev->dma_skip_sync = true; + else + dev->dma_skip_sync = false; +} +#else /* !CONFIG_DMA_NEED_SYNC */ +static inline void dma_setup_need_sync(struct device *dev) { } +#endif /* !CONFIG_DMA_NEED_SYNC */ /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -331,6 +488,9 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); if (!ops->get_sgtable) return -ENXIO; return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); @@ -344,8 +504,6 @@ EXPORT_SYMBOL(dma_get_sgtable_attrs); */ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { - if (force_dma_unencrypted(dev)) - prot = pgprot_decrypted(prot); if (dev_is_dma_coherent(dev)) return prot; #ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE @@ -369,6 +527,8 @@ bool dma_can_mmap(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); + if (use_dma_iommu(dev)) + return true; return ops->mmap != NULL; } EXPORT_SYMBOL_GPL(dma_can_mmap); @@ -395,6 +555,9 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); if (!ops->mmap) return -ENXIO; return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -407,6 +570,10 @@ u64 dma_get_required_mask(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_get_required_mask(dev); + + if (use_dma_iommu(dev)) + return DMA_BIT_MASK(32); + if (ops->get_required_mask) return ops->get_required_mask(dev); @@ -430,20 +597,38 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, WARN_ON_ONCE(!dev->coherent_dma_mask); - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) + /* + * DMA allocations can never be turned back into a page pointer, so + * requesting compound pages doesn't make sense (and can't even be + * supported at all by various backends). + */ + if (WARN_ON_ONCE(flag & __GFP_COMP)) + return NULL; + + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) { + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, + DMA_BIDIRECTIONAL, flag, attrs); return cpu_addr; + } /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (dma_alloc_direct(dev, ops)) + if (dma_alloc_direct(dev, ops)) { cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); - else if (ops->alloc) + } else if (use_dma_iommu(dev)) { + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); + } else if (ops->alloc) { cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - else + } else { + trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag, + attrs); return NULL; + } - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, DMA_BIDIRECTIONAL, + flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } EXPORT_SYMBOL(dma_alloc_attrs); @@ -464,118 +649,231 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, */ WARN_ON(irqs_disabled()); + trace_dma_free(dev, cpu_addr, dma_handle, size, DMA_BIDIRECTIONAL, + attrs); if (!cpu_addr) return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } EXPORT_SYMBOL(dma_free_attrs); -struct page *dma_alloc_pages(struct device *dev, size_t size, +static struct page *__dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { const struct dma_map_ops *ops = get_dma_ops(dev); - struct page *page; if (WARN_ON_ONCE(!dev->coherent_dma_mask)) return NULL; if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM))) return NULL; + if (WARN_ON_ONCE(gfp & __GFP_COMP)) + return NULL; size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) - page = dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); - else if (ops->alloc_pages) - page = ops->alloc_pages(dev, size, dma_handle, dir, gfp); - else + return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + if (use_dma_iommu(dev)) + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!ops->alloc_pages_op) return NULL; + return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); +} - debug_dma_map_page(dev, page, 0, size, dir, *dma_handle); +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); + if (page) { + trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle, + size, dir, gfp, 0); + debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + } else { + trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0); + } return page; } EXPORT_SYMBOL_GPL(dma_alloc_pages); -void dma_free_pages(struct device *dev, size_t size, struct page *page, +static void __dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); size = PAGE_ALIGN(size); - debug_dma_unmap_page(dev, dma_handle, size, dir); - if (dma_alloc_direct(dev, ops)) dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (use_dma_iommu(dev)) + dma_common_free_pages(dev, size, page, dma_handle, dir); else if (ops->free_pages) ops->free_pages(dev, size, page, dma_handle, dir); } + +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0); + debug_dma_unmap_page(dev, dma_handle, size, dir); + __dma_free_pages(dev, size, page, dma_handle, dir); +} EXPORT_SYMBOL_GPL(dma_free_pages); -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma, + size_t size, struct page *page) { - const struct dma_map_ops *ops = get_dma_ops(dev); - void *vaddr; + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (vma->vm_pgoff >= count || vma_pages(vma) > count - vma->vm_pgoff) + return -ENXIO; + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(page) + vma->vm_pgoff, + vma_pages(vma) << PAGE_SHIFT, vma->vm_page_prot); +} +EXPORT_SYMBOL_GPL(dma_mmap_pages); + +static struct sg_table *alloc_single_sgt(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp) +{ + struct sg_table *sgt; + struct page *page; - if (!ops || !ops->alloc_noncoherent) { - struct page *page; + sgt = kmalloc(sizeof(*sgt), gfp); + if (!sgt) + return NULL; + if (sg_alloc_table(sgt, 1, gfp)) + goto out_free_sgt; + page = __dma_alloc_pages(dev, size, &sgt->sgl->dma_address, dir, gfp); + if (!page) + goto out_free_table; + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + sg_dma_len(sgt->sgl) = sgt->sgl->length; + return sgt; +out_free_table: + sg_free_table(sgt); +out_free_sgt: + kfree(sgt); + return NULL; +} + +struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + struct sg_table *sgt; + + if (WARN_ON_ONCE(attrs & ~DMA_ATTR_ALLOC_SINGLE_PAGES)) + return NULL; + if (WARN_ON_ONCE(gfp & __GFP_COMP)) + return NULL; - page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!page) - return NULL; - return page_address(page); + if (use_dma_iommu(dev)) + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); + else + sgt = alloc_single_sgt(dev, size, dir, gfp); + + if (sgt) { + sgt->nents = 1; + trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); + debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + } else { + trace_dma_alloc_sgt_err(dev, NULL, 0, size, dir, gfp, attrs); } + return sgt; +} +EXPORT_SYMBOL_GPL(dma_alloc_noncontiguous); - size = PAGE_ALIGN(size); - vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp); - if (vaddr) - debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir, - *dma_handle); - return vaddr; +static void free_single_sgt(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + __dma_free_pages(dev, size, sg_page(sgt->sgl), sgt->sgl->dma_address, + dir); + sg_free_table(sgt); + kfree(sgt); } -EXPORT_SYMBOL_GPL(dma_alloc_noncoherent); -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir) +void dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + trace_dma_free_sgt(dev, sgt, size, dir); + debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); + + if (use_dma_iommu(dev)) + iommu_dma_free_noncontiguous(dev, size, sgt, dir); + else + free_single_sgt(dev, size, sgt, dir); +} +EXPORT_SYMBOL_GPL(dma_free_noncontiguous); + +void *dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt) +{ + + if (use_dma_iommu(dev)) + return iommu_dma_vmap_noncontiguous(dev, size, sgt); + + return page_address(sg_page(sgt->sgl)); +} +EXPORT_SYMBOL_GPL(dma_vmap_noncontiguous); + +void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) +{ + if (use_dma_iommu(dev)) + iommu_dma_vunmap_noncontiguous(dev, vaddr); +} +EXPORT_SYMBOL_GPL(dma_vunmap_noncontiguous); + +int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt) +{ + if (use_dma_iommu(dev)) + return iommu_dma_mmap_noncontiguous(dev, vma, size, sgt); + return dma_mmap_pages(dev, vma, size, sg_page(sgt->sgl)); +} +EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous); + +static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (!ops || !ops->free_noncoherent) { - dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); - return; + if (use_dma_iommu(dev)) { + if (WARN_ON(ops)) + return false; + return true; } - size = PAGE_ALIGN(size); - debug_dma_unmap_page(dev, dma_handle, size, dir); - ops->free_noncoherent(dev, size, vaddr, dma_handle, dir); + /* + * ->dma_supported sets and clears the bypass flag, so ignore it here + * and always call into the method if there is one. + */ + if (ops) { + if (!ops->dma_supported) + return true; + return ops->dma_supported(dev, mask); + } + + return dma_direct_supported(dev, mask); } -EXPORT_SYMBOL_GPL(dma_free_noncoherent); -int dma_supported(struct device *dev, u64 mask) +bool dma_pci_p2pdma_supported(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); /* - * ->dma_supported sets the bypass flag, so we must always call - * into the method here unless the device is truly direct mapped. + * Note: dma_ops_bypass is not checked here because P2PDMA should + * not be used with dma mapping ops that do not have support even + * if the specific device is bypassing them. */ - if (!ops) - return dma_direct_supported(dev, mask); - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); -} -EXPORT_SYMBOL(dma_supported); -#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK -void arch_dma_set_mask(struct device *dev, u64 mask); -#else -#define arch_dma_set_mask(dev, mask) do { } while (0) -#endif + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ + return !ops; +} +EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); int dma_set_mask(struct device *dev, u64 mask) { @@ -590,11 +888,12 @@ int dma_set_mask(struct device *dev, u64 mask) arch_dma_set_mask(dev, mask); *dev->dma_mask = mask; + dma_setup_need_sync(dev); + return 0; } EXPORT_SYMBOL(dma_set_mask); -#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) { /* @@ -610,7 +909,28 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) return 0; } EXPORT_SYMBOL(dma_set_coherent_mask); -#endif + +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +bool dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops) || use_dma_iommu(dev)) + return false; + return !dma_direct_all_ram_mapped(dev); +} +EXPORT_SYMBOL_GPL(dma_addressing_limited); size_t dma_max_mapping_size(struct device *dev) { @@ -619,6 +939,8 @@ size_t dma_max_mapping_size(struct device *dev) if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); + else if (use_dma_iommu(dev)) + size = iommu_dma_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -626,20 +948,27 @@ size_t dma_max_mapping_size(struct device *dev) } EXPORT_SYMBOL_GPL(dma_max_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +size_t dma_opt_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + size_t size = SIZE_MAX; - if (dma_map_direct(dev, ops)) - return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; + if (use_dma_iommu(dev)) + size = iommu_dma_opt_mapping_size(); + else if (ops && ops->opt_mapping_size) + size = ops->opt_mapping_size(); + + return min(dma_max_mapping_size(dev), size); } -EXPORT_SYMBOL_GPL(dma_need_sync); +EXPORT_SYMBOL_GPL(dma_opt_mapping_size); unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) + return iommu_dma_get_merge_boundary(dev); + if (!ops || !ops->get_merge_boundary) return 0; /* can't merge */ diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 910ae69cae77..9afd569eadb9 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -4,6 +4,14 @@ * the allocated memory contains normal pages in the direct kernel mapping. */ #include <linux/dma-map-ops.h> +#include <linux/iommu-dma.h> + +static struct page *dma_common_vaddr_to_page(void *cpu_addr) +{ + if (is_vmalloc_addr(cpu_addr)) + return vmalloc_to_page(cpu_addr); + return virt_to_page(cpu_addr); +} /* * Create scatter-list for the already allocated DMA buffer. @@ -12,7 +20,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret; ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -32,6 +40,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); @@ -43,7 +52,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + page_to_pfn(page) + vma->vm_pgoff, user_count << PAGE_SHIFT, vma->vm_page_prot); #else return -ENXIO; @@ -62,8 +71,12 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - *dma_handle = ops->map_page(dev, page, 0, size, dir, - DMA_ATTR_SKIP_CPU_SYNC); + if (use_dma_iommu(dev)) + *dma_handle = iommu_dma_map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else + *dma_handle = ops->map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); if (*dma_handle == DMA_MAPPING_ERROR) { dma_free_contiguous(dev, page, size); return NULL; @@ -78,7 +91,10 @@ void dma_common_free_pages(struct device *dev, size_t size, struct page *page, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (ops->unmap_page) + if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else if (ops->unmap_page) ops->unmap_page(dev, dma_handle, size, dir, DMA_ATTR_SKIP_CPU_SYNC); dma_free_contiguous(dev, page, size); diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 5f84e6cdb78e..7b04f7575796 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -70,9 +70,9 @@ static bool cma_in_zone(gfp_t gfp) /* CMA can't cross zone boundaries, see cma_activate_area() */ end = cma_get_base(cma) + size - 1; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) - return end <= DMA_BIT_MASK(zone_dma_bits); + return end <= zone_dma_limit; if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) - return end <= DMA_BIT_MASK(32); + return end <= max(DMA_BIT_MASK(32), zone_dma_limit); return true; } @@ -84,8 +84,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, void *addr; int ret = -ENOMEM; - /* Cannot allocate larger than MAX_ORDER-1 */ - order = min(get_order(pool_size), MAX_ORDER-1); + /* Cannot allocate larger than MAX_PAGE_ORDER */ + order = min(get_order(pool_size), MAX_PAGE_ORDER); do { pool_size = 1 << (PAGE_SHIFT + order); @@ -135,9 +135,9 @@ encrypt_mapping: remove_mapping: #ifdef CONFIG_DMA_DIRECT_REMAP dma_common_free_remap(addr, pool_size); -#endif -free_page: __maybe_unused +free_page: __free_pages(page, order); +#endif out: return ret; } @@ -190,7 +190,7 @@ static int __init dma_atomic_pool_init(void) /* * If coherent_pool was not used on the command line, default the pool - * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1. + * sizes to 128KB per 1GB of memory, min 128KB, max MAX_PAGE_ORDER. */ if (!atomic_pool_size) { unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); @@ -203,7 +203,7 @@ static int __init dma_atomic_pool_init(void) GFP_KERNEL); if (!atomic_pool_kernel) ret = -ENOMEM; - if (IS_ENABLED(CONFIG_ZONE_DMA)) { + if (has_managed_dma()) { atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, GFP_KERNEL | GFP_DMA); if (!atomic_pool_dma) @@ -226,7 +226,7 @@ static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) if (prev == NULL) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) return atomic_pool_dma32; - if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + if (atomic_pool_dma && (gfp & GFP_DMA)) return atomic_pool_dma; return atomic_pool_kernel; } diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 905c3fa005f1..9e2afad1c615 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -10,8 +10,10 @@ struct page **dma_common_find_pages(void *cpu_addr) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) + if (!area || !(area->flags & VM_DMA_COHERENT)) return NULL; + WARN(area->flags != VM_DMA_COHERENT, + "unexpected flags in area: %p\n", cpu_addr); return area->pages; } @@ -43,13 +45,13 @@ void *dma_common_contiguous_remap(struct page *page, size_t size, void *vaddr; int i; - pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL); + pages = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); if (!pages) return NULL; for (i = 0; i < count; i++) pages[i] = nth_page(page, i); vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); - kfree(pages); + kvfree(pages); return vaddr; } @@ -61,11 +63,10 @@ void dma_common_free_remap(void *cpu_addr, size_t size) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) { + if (!area || !(area->flags & VM_DMA_COHERENT)) { WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } - unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size)); vunmap(cpu_addr); } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 7c42df6e6100..abcf3fa63a56 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -21,38 +21,37 @@ #define pr_fmt(fmt) "software IO TLB: " fmt #include <linux/cache.h> +#include <linux/cc_platform.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> #include <linux/dma-direct.h> #include <linux/dma-map-ops.h> -#include <linux/mm.h> #include <linux/export.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/iommu-helper.h> +#include <linux/init.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <linux/rculist.h> +#include <linux/scatterlist.h> +#include <linux/set_memory.h> #include <linux/spinlock.h> #include <linux/string.h> #include <linux/swiotlb.h> -#include <linux/pfn.h> #include <linux/types.h> -#include <linux/ctype.h> -#include <linux/highmem.h> -#include <linux/gfp.h> -#include <linux/scatterlist.h> -#include <linux/mem_encrypt.h> -#include <linux/set_memory.h> -#ifdef CONFIG_DEBUG_FS -#include <linux/debugfs.h> +#ifdef CONFIG_DMA_RESTRICTED_POOL +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/of_reserved_mem.h> +#include <linux/slab.h> #endif -#include <asm/io.h> -#include <asm/dma.h> - -#include <linux/init.h> -#include <linux/memblock.h> -#include <linux/iommu-helper.h> - #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* @@ -62,134 +61,192 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -enum swiotlb_force swiotlb_force; +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. +/** + * struct io_tlb_slot - IO TLB slot descriptor + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @list: The free list describing the number of free entries available + * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ -phys_addr_t io_tlb_start, io_tlb_end; +struct io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; + unsigned short list; + unsigned short pad_slots; +}; -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; +static bool swiotlb_force_bounce; +static bool swiotlb_force_disable; -/* - * The number of used IO TLB block - */ -static unsigned long io_tlb_used; +#ifdef CONFIG_SWIOTLB_DYNAMIC -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; +static void swiotlb_dyn_alloc(struct work_struct *work); -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). +static struct io_tlb_mem io_tlb_default_mem = { + .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock), + .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools), + .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc, + swiotlb_dyn_alloc), +}; + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static struct io_tlb_mem io_tlb_default_mem; + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; +static unsigned long default_nareas; + +/** + * struct io_tlb_area - IO TLB memory area descriptor + * + * This is a single area with a single lock. + * + * @used: The number of used IO TLB block. + * @index: The slot index to start searching in this area for next round. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. */ -static unsigned int max_segment; +struct io_tlb_area { + unsigned long used; + unsigned int index; + spinlock_t lock; +}; /* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. + * Round up number of slabs to the next power of 2. The last area is going + * be smaller than the rest if default_nslabs is not power of two. + * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE, + * otherwise a segment may span two or more areas. It conflicts with free + * contiguous slots tracking: free slots are treated contiguous no matter + * whether they cross an area boundary. + * + * Return true if default_nslabs is rounded up. */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; +static bool round_up_default_nslabs(void) +{ + if (!default_nareas) + return false; + + if (default_nslabs < IO_TLB_SEGSIZE * default_nareas) + default_nslabs = IO_TLB_SEGSIZE * default_nareas; + else if (is_power_of_2(default_nslabs)) + return false; + default_nslabs = roundup_pow_of_two(default_nslabs); + return true; +} -/* - * Protect the above data structures in the map and unmap calls +/** + * swiotlb_adjust_nareas() - adjust the number of areas and slots + * @nareas: Desired number of areas. Zero is treated as 1. + * + * Adjust the default number of areas in a memory pool. + * The default size of the memory pool may also change to meet minimum area + * size requirements. */ -static DEFINE_SPINLOCK(io_tlb_lock); +static void swiotlb_adjust_nareas(unsigned int nareas) +{ + if (!nareas) + nareas = 1; + else if (!is_power_of_2(nareas)) + nareas = roundup_pow_of_two(nareas); -static int late_alloc; + default_nareas = nareas; + + pr_info("area num %d.\n", nareas); + if (round_up_default_nslabs()) + pr_info("SWIOTLB bounce buffer size roundup to %luMB", + (default_nslabs << IO_TLB_SHIFT) >> 20); +} + +/** + * limit_nareas() - get the maximum number of areas for a given memory pool size + * @nareas: Desired number of areas. + * @nslots: Total number of slots in the memory pool. + * + * Limit the number of areas to the maximum possible number of areas in + * a memory pool of the given size. + * + * Return: Maximum possible number of areas. + */ +static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) +{ + if (nslots < nareas * IO_TLB_SEGSIZE) + return nslots / IO_TLB_SEGSIZE; + return nareas; +} static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + default_nslabs = + ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); } if (*str == ',') ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } + if (isdigit(*str)) + swiotlb_adjust_nareas(simple_strtoul(str, &str, 0)); + if (*str == ',') + ++str; + if (!strcmp(str, "force")) + swiotlb_force_bounce = true; + else if (!strcmp(str, "noforce")) + swiotlb_force_disable = true; return 0; } early_param("swiotlb", setup_io_tlb_npages); -static bool no_iotlb_memory; - -unsigned long swiotlb_nr_tbl(void) -{ - return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) -{ - return unlikely(no_iotlb_memory) ? 0 : max_segment; -} -EXPORT_SYMBOL_GPL(swiotlb_max_segment); - -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - unsigned long swiotlb_size_or_default(void) { - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; - - return size ? size : (IO_TLB_DEFAULT_SIZE); + return default_nslabs << IO_TLB_SHIFT; } -void __init swiotlb_adjust_size(unsigned long new_size) +void __init swiotlb_adjust_size(unsigned long size) { - unsigned long size; - /* * If swiotlb parameter has not been specified, give a chance to * architectures such as those supporting memory encryption to * adjust/expand SWIOTLB size for their use. */ - if (!io_tlb_nslabs) { - size = ALIGN(new_size, 1 << IO_TLB_SHIFT); - io_tlb_nslabs = size >> IO_TLB_SHIFT; - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT) + return; - pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); - } + size = ALIGN(size, IO_TLB_SIZE); + default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + if (round_up_default_nslabs()) + size = default_nslabs << IO_TLB_SHIFT; + pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); } void swiotlb_print_info(void) { - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; - if (no_iotlb_memory) { + if (!mem->nslabs) { pr_warn("No low mem\n"); return; } - pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end, - bytes >> 20); + pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end, + (mem->nslabs << IO_TLB_SHIFT) >> 20); +} + +static inline unsigned long io_tlb_offset(unsigned long val) +{ + return val & (IO_TLB_SEGSIZE - 1); +} + +static inline unsigned long nr_slots(u64 val) +{ + return DIV_ROUND_UP(val, IO_TLB_SIZE); } /* @@ -200,90 +257,169 @@ void swiotlb_print_info(void) */ void __init swiotlb_update_mem_attributes(void) { - void *vaddr; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; unsigned long bytes; - if (no_iotlb_memory || late_alloc) + if (!mem->nslabs || mem->late_alloc) return; + bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); + set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); +} + +static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, + unsigned long nslabs, bool late_alloc, unsigned int nareas) +{ + void *vaddr = phys_to_virt(start); + unsigned long bytes = nslabs << IO_TLB_SHIFT, i; + + mem->nslabs = nslabs; + mem->start = start; + mem->end = mem->start + bytes; + mem->late_alloc = late_alloc; + mem->nareas = nareas; + mem->area_nslabs = nslabs / mem->nareas; + + for (i = 0; i < mem->nareas; i++) { + spin_lock_init(&mem->areas[i].lock); + mem->areas[i].index = 0; + mem->areas[i].used = 0; + } + + for (i = 0; i < mem->nslabs; i++) { + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; + } - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); + mem->vaddr = vaddr; + return; } -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +/** + * add_mem_pool() - add a memory pool to the allocator + * @mem: Software IO TLB allocator. + * @pool: Memory pool to be added. + */ +static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool) { - unsigned long i, bytes; - size_t alloc_size; - - bytes = nslabs << IO_TLB_SHIFT; +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock(&mem->lock); + list_add_rcu(&pool->node, &mem->pools); + mem->nslabs += pool->nslabs; + spin_unlock(&mem->lock); +#else + mem->nslabs = pool->nslabs; +#endif +} - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; +static void __init *swiotlb_memblock_alloc(unsigned long nslabs, + unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) +{ + size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); + void *tlb; /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. + * By default allocate the bounce buffer memory from low memory, but + * allow to pick a location everywhere for hypervisors with guest + * memory encryption. */ - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int)); - io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE); - if (!io_tlb_list) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - - alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)); - io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE); - if (!io_tlb_orig_addr) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, alloc_size, PAGE_SIZE); - - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - no_iotlb_memory = false; - - if (verbose) - swiotlb_print_info(); + if (flags & SWIOTLB_ANY) + tlb = memblock_alloc(bytes, PAGE_SIZE); + else + tlb = memblock_alloc_low(bytes, PAGE_SIZE); - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; + if (!tlb) { + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", + __func__, bytes); + return NULL; + } + + if (remap && remap(tlb, nslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); + return NULL; + } + + return tlb; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -void __init -swiotlb_init(int verbose) +void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) { - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long nslabs; + unsigned int nareas; + size_t alloc_size; + void *tlb; + + if (!addressing_limit && !swiotlb_force_bounce) + return; + if (swiotlb_force_disable) + return; + + io_tlb_default_mem.force_bounce = + swiotlb_force_bounce || (flags & SWIOTLB_FORCE); + +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; + if (flags & SWIOTLB_ANY) + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); + else + io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT; +#endif + + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + nslabs = default_nslabs; + nareas = limit_nareas(default_nareas, nslabs); + while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { + if (nslabs <= IO_TLB_MIN_SLABS) + return; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); } - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + if (default_nslabs != nslabs) { + pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", + default_nslabs, nslabs); + default_nslabs = nslabs; + } - /* Get IO TLB memory from the low pages */ - vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) { + pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", + __func__, alloc_size, PAGE_SIZE); return; + } - if (io_tlb_start) { - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - io_tlb_start = 0; + mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), + nareas), SMP_CACHE_BYTES); + if (!mem->areas) { + pr_warn("%s: Failed to allocate mem->areas.\n", __func__); + return; } - pr_warn("Cannot allocate buffer"); - no_iotlb_memory = true; + + swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); + add_mem_pool(&io_tlb_default_mem, mem); + + if (flags & SWIOTLB_VERBOSE) + swiotlb_print_info(); +} + +void __init swiotlb_init(bool addressing_limit, unsigned int flags) +{ + swiotlb_init_remap(addressing_limit, flags, NULL); } /* @@ -291,149 +427,474 @@ swiotlb_init(int verbose) * initialize the swiotlb later using the slab allocator if needed. * This should be just like above, but with some error catching. */ -int -swiotlb_late_init_with_default_size(size_t default_size) +int swiotlb_init_late(size_t size, gfp_t gfp_mask, + int (*remap)(void *tlb, unsigned long nslabs)) { - unsigned long bytes, req_nslabs = io_tlb_nslabs; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + unsigned int nareas; unsigned char *vstart = NULL; - unsigned int order; + unsigned int order, area_order; + bool retried = false; int rc = 0; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } + if (io_tlb_default_mem.nslabs) + return 0; - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + if (swiotlb_force_disable) + return 0; + + io_tlb_default_mem.force_bounce = swiotlb_force_bounce; + +#ifdef CONFIG_SWIOTLB_DYNAMIC + if (!remap) + io_tlb_default_mem.can_grow = true; + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) + io_tlb_default_mem.phys_limit = zone_dma_limit; + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) + io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); + else + io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); +#endif + + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); + +retry: + order = get_order(nslabs << IO_TLB_SHIFT); + nslabs = SLABS_PER_PAGE << order; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, order); if (vstart) break; order--; + nslabs = SLABS_PER_PAGE << order; + retried = true; } - if (!vstart) { - io_tlb_nslabs = req_nslabs; + if (!vstart) return -ENOMEM; + + if (remap) + rc = remap(vstart, nslabs); + if (rc) { + free_pages((unsigned long)vstart, order); + + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + if (nslabs < IO_TLB_MIN_SLABS) + return rc; + retried = true; + goto retry; } - if (order != get_order(bytes)) { + + if (retried) { pr_warn("only able to allocate %ld MB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - return rc; + nareas = limit_nareas(default_nareas, nslabs); + area_order = get_order(array_size(sizeof(*mem->areas), nareas)); + mem->areas = (struct io_tlb_area *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); + if (!mem->areas) + goto error_area; + + mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(array_size(sizeof(*mem->slots), nslabs))); + if (!mem->slots) + goto error_slots; + + set_memory_decrypted((unsigned long)vstart, + (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); + swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true, + nareas); + add_mem_pool(&io_tlb_default_mem, mem); + + swiotlb_print_info(); + return 0; + +error_slots: + free_pages((unsigned long)mem->areas, area_order); +error_area: + free_pages((unsigned long)vstart, order); + return -ENOMEM; } -static void swiotlb_cleanup(void) +void __init swiotlb_exit(void) { - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; + struct io_tlb_pool *mem = &io_tlb_default_mem.defpool; + unsigned long tbl_vaddr; + size_t tbl_size, slots_size; + unsigned int area_order; + + if (swiotlb_force_bounce) + return; + + if (!mem->nslabs) + return; + + pr_info("tearing down default memory pool\n"); + tbl_vaddr = (unsigned long)phys_to_virt(mem->start); + tbl_size = PAGE_ALIGN(mem->end - mem->start); + slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs)); + + set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); + if (mem->late_alloc) { + area_order = get_order(array_size(sizeof(*mem->areas), + mem->nareas)); + free_pages((unsigned long)mem->areas, area_order); + free_pages(tbl_vaddr, get_order(tbl_size)); + free_pages((unsigned long)mem->slots, get_order(slots_size)); + } else { + memblock_free_late(__pa(mem->areas), + array_size(sizeof(*mem->areas), mem->nareas)); + memblock_free_late(mem->start, tbl_size); + memblock_free_late(__pa(mem->slots), slots_size); + } + + memset(mem, 0, sizeof(*mem)); } -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * alloc_dma_pages() - allocate pages to be used for DMA + * @gfp: GFP flags for the allocation. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * + * Allocate pages from the buddy allocator. If successful, make the allocated + * pages decrypted that they can be used for DMA. + * + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. + */ +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { - unsigned long i, bytes; + unsigned int order = get_order(bytes); + struct page *page; + phys_addr_t paddr; + void *vaddr; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } - bytes = nslabs << IO_TLB_SHIFT; + vaddr = phys_to_virt(paddr); + if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) + goto error; + return page; - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; +error: + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); + return NULL; +} - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); +/** + * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer + * @dev: Device for which a memory pool is allocated. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * @gfp: GFP flags for the allocation. + * + * Return: Allocated pages, or %NULL on allocation failure. + */ +static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, + u64 phys_limit, gfp_t gfp) +{ + struct page *page; /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. + * Allocate from the atomic pools if memory is encrypted and + * the allocation is atomic, because decrypting may block. */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; + if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { + void *vaddr; - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; + if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) + return NULL; - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, + dma_coherent_ok); } - io_tlb_index = 0; - no_iotlb_memory = false; - swiotlb_print_info(); + gfp &= ~GFP_ZONEMASK; + if (phys_limit <= zone_dma_limit) + gfp |= __GFP_DMA; + else if (phys_limit <= DMA_BIT_MASK(32)) + gfp |= __GFP_DMA32; + + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + phys_limit < DMA_BIT_MASK(64) && + !(gfp & (__GFP_DMA32 | __GFP_DMA))) + gfp |= __GFP_DMA32; + else if (IS_ENABLED(CONFIG_ZONE_DMA) && + !(gfp & __GFP_DMA)) + gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; + else + return NULL; + } - late_alloc = 1; + return page; +} - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); +/** + * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer + * @vaddr: Virtual address of the buffer. + * @bytes: Size of the buffer. + */ +static void swiotlb_free_tlb(void *vaddr, size_t bytes) +{ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(NULL, vaddr, bytes)) + return; - return 0; + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(virt_to_page(vaddr), get_order(bytes)); +} -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - swiotlb_cleanup(); - return -ENOMEM; +/** + * swiotlb_alloc_pool() - allocate a new IO TLB memory pool + * @dev: Device for which a memory pool is allocated. + * @minslabs: Minimum number of slabs. + * @nslabs: Desired (maximum) number of slabs. + * @nareas: Number of areas. + * @phys_limit: Maximum DMA buffer physical address. + * @gfp: GFP flags for the allocations. + * + * Allocate and initialize a new IO TLB memory pool. The actual number of + * slabs may be reduced if allocation of @nslabs fails. If even + * @minslabs cannot be allocated, this function fails. + * + * Return: New memory pool, or %NULL on allocation failure. + */ +static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, + unsigned long minslabs, unsigned long nslabs, + unsigned int nareas, u64 phys_limit, gfp_t gfp) +{ + struct io_tlb_pool *pool; + unsigned int slot_order; + struct page *tlb; + size_t pool_size; + size_t tlb_size; + + if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { + nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; + nareas = limit_nareas(nareas, nslabs); + } + + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); + pool = kzalloc(pool_size, gfp); + if (!pool) + goto error; + pool->areas = (void *)pool + sizeof(*pool); + + tlb_size = nslabs << IO_TLB_SHIFT; + while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) { + if (nslabs <= minslabs) + goto error_tlb; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + tlb_size = nslabs << IO_TLB_SHIFT; + } + + slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); + pool->slots = (struct io_tlb_slot *) + __get_free_pages(gfp, slot_order); + if (!pool->slots) + goto error_slots; + + swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas); + return pool; + +error_slots: + swiotlb_free_tlb(page_address(tlb), tlb_size); +error_tlb: + kfree(pool); +error: + return NULL; } -void __init swiotlb_exit(void) +/** + * swiotlb_dyn_alloc() - dynamic memory pool allocation worker + * @work: Pointer to dyn_alloc in struct io_tlb_mem. + */ +static void swiotlb_dyn_alloc(struct work_struct *work) { - if (!io_tlb_orig_addr) + struct io_tlb_mem *mem = + container_of(work, struct io_tlb_mem, dyn_alloc); + struct io_tlb_pool *pool; + + pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs, + default_nareas, mem->phys_limit, GFP_KERNEL); + if (!pool) { + pr_warn_ratelimited("Failed to allocate new pool"); return; + } - if (late_alloc) { - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + add_mem_pool(mem, pool); +} + +/** + * swiotlb_dyn_free() - RCU callback to free a memory pool + * @rcu: RCU head in the corresponding struct io_tlb_pool. + */ +static void swiotlb_dyn_free(struct rcu_head *rcu) +{ + struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu); + size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); + size_t tlb_size = pool->end - pool->start; + + free_pages((unsigned long)pool->slots, get_order(slots_size)); + swiotlb_free_tlb(pool->vaddr, tlb_size); + kfree(pool); +} + +/** + * __swiotlb_find_pool() - find the IO TLB pool for a physical address + * @dev: Device which has mapped the DMA buffer. + * @paddr: Physical address within the DMA buffer. + * + * Find the IO TLB memory pool descriptor which contains the given physical + * address, if any. This function is for use only when the dev is known to + * be using swiotlb. Use swiotlb_find_pool() for the more general case + * when this condition is not met. + * + * Return: Memory pool which contains @paddr, or %NULL if none. + */ +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; + } + + list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) { + if (paddr >= pool->start && paddr < pool->end) + goto out; } - swiotlb_cleanup(); + pool = NULL; +out: + rcu_read_unlock(); + return pool; +} + +/** + * swiotlb_del_pool() - remove an IO TLB pool from a device + * @dev: Owning device. + * @pool: Memory pool to be removed. + */ +static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_del_rcu(&pool->node); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + + call_rcu(&pool->rcu, swiotlb_dyn_free); +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/** + * swiotlb_dev_init() - initialize swiotlb fields in &struct device + * @dev: Device to be initialized. + */ +void swiotlb_dev_init(struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +#ifdef CONFIG_SWIOTLB_DYNAMIC + INIT_LIST_HEAD(&dev->dma_io_tlb_pools); + spin_lock_init(&dev->dma_io_tlb_lock); + dev->dma_uses_io_tlb = false; +#endif +} + +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. + */ +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* * Bounce: copy the swiotlb buffer from or back to the original dma location */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, + enum dma_data_direction dir, struct io_tlb_pool *mem) { + int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; + phys_addr_t orig_addr = mem->slots[index].orig_addr; + size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; + int tlb_offset; + + if (orig_addr == INVALID_PHYS_ADDR) + return; + + /* + * It's valid for tlb_offset to be negative. This can happen when the + * "offset" returned by swiotlb_align_offset() is non-zero, and the + * tlb_addr is pointing within the first "offset" bytes of the second + * or subsequent slots of the allocated swiotlb area. While it's not + * valid for tlb_addr to be pointing within the first "offset" bytes + * of the first slot, there's no way to check for such an error since + * this function can't distinguish the first slot from the second and + * subsequent slots. + */ + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, 0, orig_addr); + + orig_addr += tlb_offset; + alloc_size -= tlb_offset; + + if (size > alloc_size) { + dev_WARN_ONCE(dev, 1, + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", + alloc_size, size); + size = alloc_size; + } if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; + struct page *page; unsigned int sz = 0; unsigned long flags; @@ -441,12 +902,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); + page = pfn_to_page(pfn); if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); + memcpy_from_page(vaddr, page, offset, sz); else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); + memcpy_to_page(page, offset, vaddr, sz); local_irq_restore(flags); size -= sz; @@ -461,154 +921,539 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } } -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs) +static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) { - dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start); - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - unsigned long tmp_io_tlb_used; + return start + (idx << IO_TLB_SHIFT); +} - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); +/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ +static inline unsigned long get_max_slots(unsigned long boundary_mask) +{ + return (boundary_mask >> IO_TLB_SHIFT) + 1; +} - if (mem_encrypt_active()) - pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); +static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index) +{ + if (index >= mem->area_nslabs) + return 0; + return index; +} - if (mapping_size > alloc_size) { - dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } +/* + * Track the total used slots with a global atomic value in order to have + * correct information to determine the high water mark. The mem_used() + * function gives imprecise results because there's no locking across + * multiple areas. + */ +#ifdef CONFIG_DEBUG_FS +static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) +{ + unsigned long old_hiwater, new_used; + + new_used = atomic_long_add_return(nslots, &mem->total_used); + old_hiwater = atomic_long_read(&mem->used_hiwater); + do { + if (new_used <= old_hiwater) + break; + } while (!atomic_long_try_cmpxchg(&mem->used_hiwater, + &old_hiwater, new_used)); +} - mask = dma_get_seg_boundary(hwdev); +static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_sub(nslots, &mem->total_used); +} - tbl_dma_addr &= mask; +#else /* !CONFIG_DEBUG_FS */ +static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +#endif /* CONFIG_DEBUG_FS */ - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; +#ifdef CONFIG_SWIOTLB_DYNAMIC +#ifdef CONFIG_DEBUG_FS +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_add(nslots, &mem->transient_nslabs); +} + +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_sub(nslots, &mem->transient_nslabs); +} + +#else /* !CONFIG_DEBUG_FS */ +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/** + * swiotlb_search_pool_area() - search one memory area in one pool + * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. + * @area_index: Index of the IO TLB memory area to be searched. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Find a suitable sequence of IO TLB entries for the request and allocate + * a buffer from the given IO TLB memory area. + * This function takes care of locking. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool, + int area_index, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask) +{ + struct io_tlb_area *area = pool->areas + area_index; + unsigned long boundary_mask = dma_get_seg_boundary(dev); + dma_addr_t tbl_dma_addr = + phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); + unsigned int nslots = nr_slots(alloc_size), stride; + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); + unsigned int index, slots_checked, count = 0, i; + unsigned long flags; + unsigned int slot_base; + unsigned int slot_index; + + BUG_ON(!nslots); + BUG_ON(area_index >= pool->nareas); /* - * Carefully handle integer overflow which can occur when mask == ~0UL. + * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be + * page-aligned in the absence of any other alignment requirements. + * 'alloc_align_mask' was later introduced to specify the alignment + * explicitly, however this is passed as zero for streaming mappings + * and so we preserve the old behaviour there in case any drivers are + * relying on it. */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) + alloc_align_mask = PAGE_SIZE - 1; /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. */ - nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (alloc_size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. + * For mappings with an alignment requirement don't bother looping to + * unaligned slots once we found an aligned one. */ - spin_lock_irqsave(&io_tlb_lock, flags); + stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); - if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) + spin_lock_irqsave(&area->lock, flags); + if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; + slot_base = area_index * pool->area_nslabs; + index = area->index; - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } + for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { + phys_addr_t tlb_addr; - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); + slot_index = slot_base + index; + tlb_addr = slot_addr(tbl_dma_addr, slot_index); - goto found; + if ((tlb_addr & alloc_align_mask) || + (orig_addr && (tlb_addr & iotlb_align_mask) != + (orig_addr & iotlb_align_mask))) { + index = wrap_area_index(pool, index + 1); + slots_checked++; + continue; + } + + if (!iommu_is_span_boundary(slot_index, nslots, + nr_slots(tbl_dma_addr), + max_slots)) { + if (pool->slots[slot_index].list >= nslots) + goto found; } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); + index = wrap_area_index(pool, index + stride); + slots_checked += stride; + } not_found: - tmp_io_tlb_used = io_tlb_used; + spin_unlock_irqrestore(&area->lock, flags); + return -1; - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, io_tlb_nslabs, tmp_io_tlb_used); - return (phys_addr_t)DMA_MAPPING_ERROR; found: - io_tlb_used += nslots; - spin_unlock_irqrestore(&io_tlb_lock, flags); + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot onwards + * and set the list of free entries to '0' indicating unavailable. + */ + for (i = slot_index; i < slot_index + nslots; i++) { + pool->slots[i].list = 0; + pool->slots[i].alloc_size = alloc_size - (offset + + ((i - slot_index) << IO_TLB_SHIFT)); + } + for (i = slot_index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + pool->slots[i].list; i--) + pool->slots[i].list = ++count; /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. + * Update the indices to avoid searching in the next round. */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); + area->index = wrap_area_index(pool, index + nslots); + area->used += nslots; + spin_unlock_irqrestore(&area->lock, flags); - return tlb_addr; + inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots); + return slot_index; } -/* - * tlb_addr is the physical address of the bounce buffer to unmap. +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * swiotlb_search_area() - search one memory area in all pools + * @dev: Device which maps the buffer. + * @start_cpu: Start CPU number. + * @cpu_offset: Offset from @start_cpu. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. + * + * Search one memory area in all pools for a sequence of slots that match the + * allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs) +static int swiotlb_search_area(struct device *dev, int start_cpu, + int cpu_offset, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + int area_index; + int index = -1; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (cpu_offset >= pool->nareas) + continue; + area_index = (start_cpu + cpu_offset) & (pool->nareas - 1); + index = swiotlb_search_pool_area(dev, pool, area_index, + orig_addr, alloc_size, + alloc_align_mask); + if (index >= 0) { + *retpool = pool; + break; + } + } + rcu_read_unlock(); + return index; +} + +/** + * swiotlb_find_slots() - search for slots in the whole swiotlb + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. + * + * Search through the whole software IO TLB to find a sequence of slots that + * match the allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + unsigned long nslabs; unsigned long flags; - int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; + u64 phys_limit; + int cpu, i; + int index; + + if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) + return -1; + + cpu = raw_smp_processor_id(); + for (i = 0; i < default_nareas; ++i) { + index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, + alloc_align_mask, &pool); + if (index >= 0) + goto found; + } + + if (!mem->can_grow) + return -1; + + schedule_work(&mem->dyn_alloc); + + nslabs = nr_slots(alloc_size); + phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); + pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, + GFP_NOWAIT | __GFP_NOWARN); + if (!pool) + return -1; + + index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, + alloc_size, alloc_align_mask); + if (index < 0) { + swiotlb_dyn_free(&pool->rcu); + return -1; + } + + pool->transient = true; + spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); + list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); + spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + inc_transient_used(mem, pool->nslabs); + +found: + WRITE_ONCE(dev->dma_uses_io_tlb, true); /* - * First, sync the memory before unmapping the entry + * The general barrier orders reads and writes against a presumed store + * of the SWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in swiotlb_find_pool(). */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE); + smp_mb(); + + *retpool = pool; + return index; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct io_tlb_pool **retpool) +{ + struct io_tlb_pool *pool; + int start, i; + int index; + + *retpool = pool = &dev->dma_io_tlb_mem->defpool; + i = start = raw_smp_processor_id() & (pool->nareas - 1); + do { + index = swiotlb_search_pool_area(dev, pool, i, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) + return index; + if (++i >= pool->nareas) + i = 0; + } while (i != start); + return -1; +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +#ifdef CONFIG_DEBUG_FS + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is accurate in this version of the function, because an atomic + * counter is available if CONFIG_DEBUG_FS is set. + * + * Return: Number of used slots. + */ +static unsigned long mem_used(struct io_tlb_mem *mem) +{ + return atomic_long_read(&mem->total_used); +} + +#else /* !CONFIG_DEBUG_FS */ + +/** + * mem_pool_used() - get number of used slots in a memory pool + * @pool: Software IO TLB memory pool. + * + * The result is not accurate, see mem_used(). + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_pool_used(struct io_tlb_pool *pool) +{ + int i; + unsigned long used = 0; + + for (i = 0; i < pool->nareas; i++) + used += pool->areas[i].used; + return used; +} + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Software IO TLB allocator. + * + * The result is not accurate, because there is no locking of individual + * areas. + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_used(struct io_tlb_mem *mem) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + struct io_tlb_pool *pool; + unsigned long used = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) + used += mem_pool_used(pool); + rcu_read_unlock(); + + return used; +#else + return mem_pool_used(&mem->defpool); +#endif +} + +#endif /* CONFIG_DEBUG_FS */ + +/** + * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) physical IO buffer address + * @mapping_size: Requested size of the actual bounce buffer, excluding + * any pre- or post-padding for alignment + * @alloc_align_mask: Required start and end alignment of the allocated buffer + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Find and allocate a suitable sequence of IO TLB slots for the request. + * The allocated space starts at an alignment specified by alloc_align_mask, + * and the size of the allocated space is rounded up so that the total amount + * of allocated space is a multiple of (alloc_align_mask + 1). If + * alloc_align_mask is zero, the allocated space may be at any alignment and + * the size is not rounded up. + * + * The returned address is within the allocated space and matches the bits + * of orig_addr that are specified in the DMA min_align_mask for the device. As + * such, this returned address may be offset from the beginning of the allocated + * space. The bounce buffer space starting at the returned address for + * mapping_size bytes is initialized to the contents of the original IO buffer + * area. Any pre-padding (due to an offset) and any post-padding (due to + * rounding-up the size) is not initialized. + */ +phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + size_t mapping_size, unsigned int alloc_align_mask, + enum dma_data_direction dir, unsigned long attrs) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + unsigned int offset; + struct io_tlb_pool *pool; + unsigned int i; + size_t size; + int index; + phys_addr_t tlb_addr; + unsigned short pad_slots; + + if (!mem || !mem->nslabs) { + dev_warn_ratelimited(dev, + "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); + + /* + * The default swiotlb memory pool is allocated with PAGE_SIZE + * alignment. If a mapping is requested with larger alignment, + * the mapping may be unable to use the initial slot(s) in all + * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request + * of or near the maximum mapping size would always fail. + */ + dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, + "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); + + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); + size = ALIGN(mapping_size + offset, alloc_align_mask + 1); + index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + size, mem->nslabs, mem_used(mem)); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + /* + * If dma_skip_sync was set, reset it on first SWIOTLB buffer + * mapping to always sync SWIOTLB buffers. + */ + dma_reset_need_sync(dev); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; + for (i = 0; i < (nr_slots(size) - pad_slots); i++) + pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(pool->start, index) + offset; + /* + * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy + * the original buffer to the TLB buffer before initiating DMA in order + * to preserve the original's data if the device does a partial write, + * i.e. if the device doesn't overwrite the entire buffer. Preserving + * the original data, even if it's garbage, is necessary to match + * hardware behavior. Use of swiotlb is supposed to be transparent, + * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. + */ + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); + return tlb_addr; +} + +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *mem) +{ + unsigned long flags; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; + int count, i; + + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; /* * Return the buffer to the free list by setting the corresponding @@ -616,59 +1461,112 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * While returning the entries to the free list, we merge the entries * with slots below and above the pool being returned. */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; + BUG_ON(aindex >= mem->nareas); - io_tlb_used -= nslots; + spin_lock_irqsave(&area->lock, flags); + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = mem->slots[index + nslots].list; + else + count = 0; + + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + mem->slots[i].list = ++count; + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } - spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list; + i--) + mem->slots[i].list = ++count; + area->used -= nslots; + spin_unlock_irqrestore(&area->lock, flags); + + dec_used(dev->dma_io_tlb_mem, nslots); } -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) +#ifdef CONFIG_SWIOTLB_DYNAMIC + +/** + * swiotlb_del_transient() - delete a transient memory pool + * @dev: Device which mapped the buffer. + * @tlb_addr: Physical address within a bounce buffer. + * @pool: Pointer to the transient memory pool to be checked and deleted. + * + * Check whether the address belongs to a transient SWIOTLB memory pool. + * If yes, then delete the pool. + * + * Return: %true if @tlb_addr belonged to a transient pool that was released. + */ +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *pool) { - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; + if (!pool->transient) + return false; - if (orig_addr == INVALID_PHYS_ADDR) + dec_used(dev->dma_io_tlb_mem, pool->nslabs); + swiotlb_del_pool(dev, pool); + dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs); + return true; +} + +#else /* !CONFIG_SWIOTLB_DYNAMIC */ + +static inline bool swiotlb_del_transient(struct device *dev, + phys_addr_t tlb_addr, struct io_tlb_pool *pool) +{ + return false; +} + +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +/* + * tlb_addr is the physical address of the bounce buffer to unmap. + */ +void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool) +{ + /* + * First, sync the memory before unmapping the entry + */ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(dev, tlb_addr, mapping_size, + DMA_FROM_DEVICE, pool); + + if (swiotlb_del_transient(dev, tlb_addr, pool)) return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); + swiotlb_release_slots(dev, tlb_addr, pool); +} - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) +{ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); + else + BUG_ON(dir != DMA_FROM_DEVICE); +} + +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) +{ + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); + else + BUG_ON(dir != DMA_TO_DEVICE); } /* @@ -681,19 +1579,18 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, phys_addr_t swiotlb_addr; dma_addr_t dma_addr; - trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, - swiotlb_force); + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir, - attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); @@ -707,30 +1604,280 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, size_t swiotlb_max_mapping_size(struct device *dev) { - return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; -} + int min_align_mask = dma_get_min_align_mask(dev); + int min_align = 0; -bool is_swiotlb_active(void) -{ /* - * When SWIOTLB is initialized, even if io_tlb_start points to physical - * address zero, io_tlb_end surely doesn't. + * swiotlb_find_slots() skips slots according to + * min align mask. This affects max mapping size. + * Take it into acount here. */ - return io_tlb_end != 0; + if (min_align_mask) + min_align = roundup(min_align_mask, IO_TLB_SIZE); + + return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align; +} + +/** + * is_swiotlb_allocated() - check if the default software IO TLB is initialized + */ +bool is_swiotlb_allocated(void) +{ + return io_tlb_default_mem.nslabs; +} + +bool is_swiotlb_active(struct device *dev) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + + return mem && mem->nslabs; +} + +/** + * default_swiotlb_base() - get the base address of the default SWIOTLB + * + * Get the lowest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_base(void) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + io_tlb_default_mem.can_grow = false; +#endif + return io_tlb_default_mem.defpool.start; +} + +/** + * default_swiotlb_limit() - get the address limit of the default SWIOTLB + * + * Get the highest physical address used by the default software IO TLB pool. + */ +phys_addr_t default_swiotlb_limit(void) +{ +#ifdef CONFIG_SWIOTLB_DYNAMIC + return io_tlb_default_mem.phys_limit; +#else + return io_tlb_default_mem.defpool.end - 1; +#endif } #ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_SWIOTLB_DYNAMIC +static unsigned long mem_transient_used(struct io_tlb_mem *mem) +{ + return atomic_long_read(&mem->transient_nslabs); +} + +static int io_tlb_transient_used_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = data; + + *val = mem_transient_used(mem); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get, + NULL, "%llu\n"); +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + +static int io_tlb_used_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = data; + + *val = mem_used(mem); + return 0; +} -static int __init swiotlb_create_debugfs(void) +static int io_tlb_hiwater_get(void *data, u64 *val) { - struct dentry *root; + struct io_tlb_mem *mem = data; - root = debugfs_create_dir("swiotlb", NULL); - debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs); - debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used); + *val = atomic_long_read(&mem->used_hiwater); return 0; } -late_initcall(swiotlb_create_debugfs); +static int io_tlb_hiwater_set(void *data, u64 val) +{ + struct io_tlb_mem *mem = data; + + /* Only allow setting to zero */ + if (val != 0) + return -EINVAL; + atomic_long_set(&mem->used_hiwater, val); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, + io_tlb_hiwater_set, "%llu\n"); + +static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, + const char *dirname) +{ + mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); + if (!mem->nslabs) + return; + + debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); + debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem, + &fops_io_tlb_used); + debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, + &fops_io_tlb_hiwater); +#ifdef CONFIG_SWIOTLB_DYNAMIC + debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, + mem, &fops_io_tlb_transient_used); #endif +} + +static int __init swiotlb_create_default_debugfs(void) +{ + swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb"); + return 0; +} + +late_initcall(swiotlb_create_default_debugfs); + +#else /* !CONFIG_DEBUG_FS */ + +static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, + const char *dirname) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_DMA_RESTRICTED_POOL + +struct page *swiotlb_alloc(struct device *dev, size_t size) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + phys_addr_t tlb_addr; + unsigned int align; + int index; + + if (!mem) + return NULL; + + align = (1 << (get_order(size) + PAGE_SHIFT)) - 1; + index = swiotlb_find_slots(dev, 0, size, align, &pool); + if (index == -1) + return NULL; + + tlb_addr = slot_addr(pool->start, index); + if (unlikely(!PAGE_ALIGNED(tlb_addr))) { + dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", + &tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); + return NULL; + } + + return pfn_to_page(PFN_DOWN(tlb_addr)); +} + +bool swiotlb_free(struct device *dev, struct page *page, size_t size) +{ + phys_addr_t tlb_addr = page_to_phys(page); + struct io_tlb_pool *pool; + + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool) + return false; + + swiotlb_release_slots(dev, tlb_addr, pool); + + return true; +} + +static int rmem_swiotlb_device_init(struct reserved_mem *rmem, + struct device *dev) +{ + struct io_tlb_mem *mem = rmem->priv; + unsigned long nslabs = rmem->size >> IO_TLB_SHIFT; + + /* Set Per-device io tlb area to one */ + unsigned int nareas = 1; + + if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) { + dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping."); + return -EINVAL; + } + + /* + * Since multiple devices can share the same pool, the private data, + * io_tlb_mem struct, will be initialized by the first device attached + * to it. + */ + if (!mem) { + struct io_tlb_pool *pool; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; + pool = &mem->defpool; + + pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL); + if (!pool->slots) { + kfree(mem); + return -ENOMEM; + } + + pool->areas = kcalloc(nareas, sizeof(*pool->areas), + GFP_KERNEL); + if (!pool->areas) { + kfree(pool->slots); + kfree(mem); + return -ENOMEM; + } + + set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), + rmem->size >> PAGE_SHIFT); + swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs, + false, nareas); + mem->force_bounce = true; + mem->for_alloc = true; +#ifdef CONFIG_SWIOTLB_DYNAMIC + spin_lock_init(&mem->lock); + INIT_LIST_HEAD_RCU(&mem->pools); +#endif + add_mem_pool(mem, pool); + + rmem->priv = mem; + + swiotlb_create_debugfs_files(mem, rmem->name); + } + + dev->dma_io_tlb_mem = mem; + + return 0; +} + +static void rmem_swiotlb_device_release(struct reserved_mem *rmem, + struct device *dev) +{ + dev->dma_io_tlb_mem = &io_tlb_default_mem; +} + +static const struct reserved_mem_ops rmem_swiotlb_ops = { + .device_init = rmem_swiotlb_device_init, + .device_release = rmem_swiotlb_device_release, +}; + +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (of_get_flat_dt_prop(node, "reusable", NULL) || + of_get_flat_dt_prop(node, "linux,cma-default", NULL) || + of_get_flat_dt_prop(node, "linux,dma-default", NULL) || + of_get_flat_dt_prop(node, "no-map", NULL)) + return -EINVAL; + + rmem->ops = &rmem_swiotlb_ops; + pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return 0; +} + +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup); +#endif /* CONFIG_DMA_RESTRICTED_POOL */ |