diff options
Diffstat (limited to 'kernel/dma')
| -rw-r--r-- | kernel/dma/Kconfig | 38 | ||||
| -rw-r--r-- | kernel/dma/Makefile | 4 | ||||
| -rw-r--r-- | kernel/dma/coherent.c | 30 | ||||
| -rw-r--r-- | kernel/dma/contiguous.c | 32 | ||||
| -rw-r--r-- | kernel/dma/debug.c | 260 | ||||
| -rw-r--r-- | kernel/dma/debug.h | 57 | ||||
| -rw-r--r-- | kernel/dma/direct.c | 207 | ||||
| -rw-r--r-- | kernel/dma/direct.h | 67 | ||||
| -rw-r--r-- | kernel/dma/dummy.c | 28 | ||||
| -rw-r--r-- | kernel/dma/map_benchmark.c | 43 | ||||
| -rw-r--r-- | kernel/dma/mapping.c | 371 | ||||
| -rw-r--r-- | kernel/dma/ops_helpers.c | 18 | ||||
| -rw-r--r-- | kernel/dma/pool.c | 18 | ||||
| -rw-r--r-- | kernel/dma/remap.c | 8 | ||||
| -rw-r--r-- | kernel/dma/swiotlb.c | 486 |
15 files changed, 1057 insertions, 610 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 4c1e9a3c0ab6..31cfdb6b4bc3 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -8,8 +8,7 @@ config HAS_DMA depends on !NO_DMA default y -config DMA_OPS - depends on HAS_DMA +config DMA_OPS_HELPERS bool # @@ -107,6 +106,11 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC bool depends on SWIOTLB +config DMA_NEED_SYNC + def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || \ + ARCH_HAS_DMA_OPS || SWIOTLB + config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" depends on OF && OF_RESERVED_MEM && SWIOTLB @@ -135,6 +139,8 @@ config DMA_COHERENT_POOL config DMA_GLOBAL_POOL select DMA_DECLARE_COHERENT + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP bool config DMA_DIRECT_REMAP @@ -142,6 +148,15 @@ config DMA_DIRECT_REMAP select DMA_COHERENT_POOL select DMA_NONCOHERENT_MMAP +# +# Fallback to arch code for DMA allocations. This should eventually go away. +# +config ARCH_HAS_DMA_ALLOC + depends on !ARCH_HAS_DMA_SET_UNCACHED + depends on !DMA_DIRECT_REMAP + depends on !DMA_GLOBAL_POOL + bool + config DMA_CMA bool "DMA Contiguous Memory Allocator" depends on HAVE_DMA_CONTIGUOUS && CMA @@ -160,7 +175,7 @@ if DMA_CMA config DMA_NUMA_CMA bool "Enable separate DMA Contiguous Memory Area for NUMA Node" - default NUMA + depends on NUMA help Enable this option to get numa CMA areas so that NUMA devices can get local memory by DMA coherent APIs. @@ -245,23 +260,6 @@ config DMA_API_DEBUG If unsure, say N. -config DMA_API_DEBUG_SG - bool "Debug DMA scatter-gather usage" - default y - depends on DMA_API_DEBUG - help - Perform extra checking that callers of dma_map_sg() have respected the - appropriate segment length/boundary limits for the given device when - preparing DMA scatterlists. - - This is particularly likely to have been overlooked in cases where the - dma_map_sg() API is used for general bulk mapping of pages rather than - preparing literal scatter-gather descriptors, where there is a risk of - unexpected behaviour from DMA API implementations if the scatterlist - is technically out-of-spec. - - If unsure, say N. - config DMA_MAP_BENCHMARK bool "Enable benchmarking of streaming DMA mapping" depends on DEBUG_FS diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 21926e46ef4f..6977033444a3 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o -obj-$(CONFIG_DMA_OPS) += ops_helpers.o -obj-$(CONFIG_DMA_OPS) += dummy.o +obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o +obj-$(CONFIG_ARCH_HAS_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index c21abc77c53e..77c8d9487a9a 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -132,8 +132,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, void dma_release_coherent_memory(struct device *dev) { - if (dev) + if (dev) { _dma_release_coherent_memory(dev->dma_mem); + dev->dma_mem = NULL; + } } static void *__dma_alloc_from_coherent(struct device *dev, @@ -328,21 +330,28 @@ int dma_init_global_coherent(phys_addr_t phys_addr, size_t size) #include <linux/of_reserved_mem.h> #ifdef CONFIG_DMA_GLOBAL_POOL -static struct reserved_mem *dma_reserved_default_memory __initdata; +static phys_addr_t dma_reserved_default_memory_base __initdata; +static phys_addr_t dma_reserved_default_memory_size __initdata; #endif static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - if (!rmem->priv) { - struct dma_coherent_mem *mem; + struct dma_coherent_mem *mem = rmem->priv; + if (!mem) { mem = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, true); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; } - dma_assign_coherent_memory(dev, rmem->priv); + + /* Warn if the device potentially can't use the reserved memory */ + if (mem->device_base + rmem->size - 1 > + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit)) + dev_warn(dev, "reserved memory is beyond device's set DMA address range\n"); + + dma_assign_coherent_memory(dev, mem); return 0; } @@ -374,9 +383,10 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) #ifdef CONFIG_DMA_GLOBAL_POOL if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) { - WARN(dma_reserved_default_memory, + WARN(dma_reserved_default_memory_size, "Reserved memory: region for default DMA coherent area is redefined\n"); - dma_reserved_default_memory = rmem; + dma_reserved_default_memory_base = rmem->base; + dma_reserved_default_memory_size = rmem->size; } #endif @@ -389,10 +399,10 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem) #ifdef CONFIG_DMA_GLOBAL_POOL static int __init dma_init_reserved_memory(void) { - if (!dma_reserved_default_memory) + if (!dma_reserved_default_memory_size) return -ENOMEM; - return dma_init_global_coherent(dma_reserved_default_memory->base, - dma_reserved_default_memory->size); + return dma_init_global_coherent(dma_reserved_default_memory_base, + dma_reserved_default_memory_size); } core_initcall(dma_init_reserved_memory); #endif /* CONFIG_DMA_GLOBAL_POOL */ diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 88c595e49e34..d8fd6f779f79 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -37,17 +37,12 @@ #define pr_fmt(fmt) "cma: " fmt -#ifdef CONFIG_CMA_DEBUG -#ifndef DEBUG -# define DEBUG -#endif -#endif - #include <asm/page.h> #include <linux/memblock.h> #include <linux/err.h> #include <linux/sizes.h> +#include <linux/dma-buf/heaps/cma.h> #include <linux/dma-map-ops.h> #include <linux/cma.h> #include <linux/nospec.h> @@ -70,8 +65,7 @@ struct cma *dma_contiguous_default_area; * Users, who want to set the size of global CMA area for their system * should use cma= kernel parameter. */ -static const phys_addr_t size_bytes __initconst = - (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) static phys_addr_t size_cmdline __initdata = -1; static phys_addr_t base_cmdline __initdata; static phys_addr_t limit_cmdline __initdata; @@ -229,7 +223,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) if (size_cmdline != -1) { selected_size = size_cmdline; selected_base = base_cmdline; - selected_limit = min_not_zero(limit_cmdline, limit); + + /* Hornor the user setup dma address limit */ + selected_limit = limit_cmdline ?: limit; + if (base_cmdline + size_cmdline == limit_cmdline) fixed = true; } else { @@ -245,6 +242,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } if (selected_size && !dma_contiguous_default_area) { + int ret; + pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); @@ -252,6 +251,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) selected_limit, &dma_contiguous_default_area, fixed); + + ret = dma_heap_cma_register_heap(dma_contiguous_default_area); + if (ret) + pr_warn("Couldn't register default CMA heap."); } } @@ -473,11 +476,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) return -EBUSY; } - if (memblock_is_region_reserved(rmem->base, rmem->size)) { - pr_info("Reserved memory: overlap with other memblock reserved region\n"); - return -EBUSY; - } - if (!of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; @@ -492,8 +490,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) pr_err("Reserved memory: unable to setup CMA region\n"); return err; } - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) dma_contiguous_default_area = cma; @@ -504,6 +500,10 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n", &rmem->base, (unsigned long)rmem->size / SZ_1M); + err = dma_heap_cma_register_heap(cma); + if (err) + pr_warn("Couldn't register CMA heap."); + return 0; } RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index f190651bcadd..138ede653de4 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -23,6 +23,7 @@ #include <linux/ctype.h> #include <linux/list.h> #include <linux/slab.h> +#include <linux/swiotlb.h> #include <asm/sections.h> #include "debug.h" @@ -38,7 +39,8 @@ enum { dma_debug_single, dma_debug_sg, dma_debug_coherent, - dma_debug_resource, + dma_debug_noncoherent, + dma_debug_phy, }; enum map_err_types { @@ -59,10 +61,10 @@ enum map_err_types { * @direction: enum dma_data_direction * @sg_call_ents: 'nents' from dma_map_sg * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn + * @paddr: physical start address of the mapping * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected + * @stack_len: number of backtrace entries in @stack_entries + * @stack_entries: stack of backtrace history */ struct dma_debug_entry { struct list_head list; @@ -73,8 +75,7 @@ struct dma_debug_entry { int direction; int sg_call_ents; int sg_mapped_ents; - unsigned long pfn; - size_t offset; + phys_addr_t paddr; enum map_err_types map_err_type; #ifdef CONFIG_STACKTRACE unsigned int stack_len; @@ -139,9 +140,10 @@ static const char *const maperr2str[] = { static const char *type2name[] = { [dma_debug_single] = "single", - [dma_debug_sg] = "scather-gather", + [dma_debug_sg] = "scatter-gather", [dma_debug_coherent] = "coherent", - [dma_debug_resource] = "resource", + [dma_debug_noncoherent] = "noncoherent", + [dma_debug_phy] = "phy", }; static const char *dir2name[] = { @@ -388,14 +390,6 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - /* * For each mapping (initial cacheline in the case of * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a @@ -415,8 +409,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) @@ -424,8 +421,8 @@ static DEFINE_SPINLOCK(radix_lock); static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) { - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); + return ((entry->paddr >> PAGE_SHIFT) << CACHELINE_PER_PAGE_SHIFT) + + (offset_in_page(entry->paddr) >> L1_CACHE_SHIFT); } static int active_cacheline_read_overlap(phys_addr_t cln) @@ -534,11 +531,11 @@ void debug_dma_dump_mappings(struct device *dev) if (!dev || dev == entry->dev) { cln = to_cacheline_number(entry); dev_info(entry->dev, - "%s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + "%s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - &cln, dir2name[entry->direction], + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], maperr2str[entry->map_err_type]); } } @@ -565,13 +562,13 @@ static int dump_show(struct seq_file *seq, void *v) list_for_each_entry(entry, &bucket->list, list) { cln = to_cacheline_number(entry); seq_printf(seq, - "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n", + "%s %s %s idx %d P=%pa D=%llx L=%llx cln=%pa %s %s\n", dev_driver_string(entry->dev), dev_name(entry->dev), type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - &cln, dir2name[entry->direction], + &entry->paddr, entry->dev_addr, + entry->size, &cln, + dir2name[entry->direction], maperr2str[entry->map_err_type]); } spin_unlock_irqrestore(&bucket->lock, flags); @@ -598,7 +595,9 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) if (rc == -ENOMEM) { pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; - } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && + is_swiotlb_active(entry->dev))) { err_printk(entry->dev, entry, "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } @@ -637,15 +636,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void) return entry; } -static void __dma_entry_alloc_check_leak(void) +/* + * This should be called outside of free_entries_lock scope to avoid potential + * deadlocks with serial consoles that use DMA. + */ +static void __dma_entry_alloc_check_leak(u32 nr_entries) { - u32 tmp = nr_total_entries % nr_prealloc_entries; + u32 tmp = nr_entries % nr_prealloc_entries; /* Shout each time we tick over some multiple of the initial pool */ if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) { pr_info("dma_debug_entry pool grown to %u (%u00%%)\n", - nr_total_entries, - (nr_total_entries / nr_prealloc_entries)); + nr_entries, + (nr_entries / nr_prealloc_entries)); } } @@ -656,8 +659,10 @@ static void __dma_entry_alloc_check_leak(void) */ static struct dma_debug_entry *dma_entry_alloc(void) { + bool alloc_check_leak = false; struct dma_debug_entry *entry; unsigned long flags; + u32 nr_entries; spin_lock_irqsave(&free_entries_lock, flags); if (num_free_entries == 0) { @@ -667,13 +672,17 @@ static struct dma_debug_entry *dma_entry_alloc(void) pr_err("debugging out of memory - disabling\n"); return NULL; } - __dma_entry_alloc_check_leak(); + alloc_check_leak = true; + nr_entries = nr_total_entries; } entry = __dma_entry_alloc(); spin_unlock_irqrestore(&free_entries_lock, flags); + if (alloc_check_leak) + __dma_entry_alloc_check_leak(nr_entries); + #ifdef CONFIG_STACKTRACE entry->stack_len = stack_trace_save(entry->stack_entries, ARRAY_SIZE(entry->stack_entries), @@ -866,7 +875,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti return 0; } -void dma_debug_add_bus(struct bus_type *bus) +void dma_debug_add_bus(const struct bus_type *bus) { struct notifier_block *nb; @@ -989,16 +998,17 @@ static void check_unmap(struct dma_debug_entry *ref) "[mapped as %s] [unmapped as %s]\n", ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { + } else if ((entry->type == dma_debug_coherent || + entry->type == dma_debug_noncoherent) && + ref->paddr != entry->paddr) { err_printk(ref->dev, entry, "device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", + "[cpu alloc address=0x%pa] " + "[cpu free address=0x%pa]", ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); + &entry->paddr, + &ref->paddr); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1038,22 +1048,25 @@ static void check_unmap(struct dma_debug_entry *ref) } hash_bucket_del(entry); - dma_entry_free(entry); - put_hash_bucket(bucket, flags); + + /* + * Free the entry outside of bucket_lock to avoid ABBA deadlocks + * between that and radix_lock. + */ + dma_entry_free(entry); } -static void check_for_stack(struct device *dev, - struct page *page, size_t offset) +static void check_for_stack(struct device *dev, phys_addr_t phys) { void *addr; struct vm_struct *stack_vm_area = task_stack_vm_area(current); if (!stack_vm_area) { /* Stack is direct-mapped. */ - if (PageHighMem(page)) + if (PhysHighMem(phys)) return; - addr = page_address(page) + offset; + addr = phys_to_virt(phys); if (object_is_on_stack(addr)) err_printk(dev, NULL, "device driver maps memory from stack [addr=%p]\n", addr); } else { @@ -1061,10 +1074,12 @@ static void check_for_stack(struct device *dev, int i; for (i = 0; i < stack_vm_area->nr_pages; i++) { - if (page != stack_vm_area->pages[i]) + if (__phys_to_pfn(phys) != + page_to_pfn(stack_vm_area->pages[i])) continue; - addr = (u8 *)current->stack + i * PAGE_SIZE + offset; + addr = (u8 *)current->stack + i * PAGE_SIZE + + (phys % PAGE_SIZE); err_printk(dev, NULL, "device driver maps memory from stack [probable addr=%p]\n", addr); break; } @@ -1155,7 +1170,6 @@ out: static void check_sg_segment(struct device *dev, struct scatterlist *sg) { -#ifdef CONFIG_DMA_API_DEBUG_SG unsigned int max_seg = dma_get_max_seg_size(dev); u64 start, end, boundary = dma_get_seg_boundary(dev); @@ -1176,7 +1190,6 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg) if ((start ^ end) & ~boundary) err_printk(dev, NULL, "mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", start, end, boundary); -#endif } void debug_dma_map_single(struct device *dev, const void *addr, @@ -1195,9 +1208,8 @@ void debug_dma_map_single(struct device *dev, const void *addr, } EXPORT_SYMBOL(debug_dma_map_single); -void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr, - unsigned long attrs) +void debug_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + int direction, dma_addr_t dma_addr, unsigned long attrs) { struct dma_debug_entry *entry; @@ -1212,20 +1224,18 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, return; entry->dev = dev; - entry->type = dma_debug_single; - entry->pfn = page_to_pfn(page); - entry->offset = offset; + entry->type = dma_debug_phy; + entry->paddr = phys; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; entry->map_err_type = MAP_ERR_NOT_CHECKED; - check_for_stack(dev, page, offset); - - if (!PageHighMem(page)) { - void *addr = page_address(page) + offset; + if (!(attrs & DMA_ATTR_MMIO)) { + check_for_stack(dev, phys); - check_for_illegal_area(dev, addr, size); + if (!PhysHighMem(phys)) + check_for_illegal_area(dev, phys_to_virt(phys), size); } add_dma_entry(entry, attrs); @@ -1269,11 +1279,11 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) } EXPORT_SYMBOL(debug_dma_mapping_error); -void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, +void debug_dma_unmap_phys(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) { struct dma_debug_entry ref = { - .type = dma_debug_single, + .type = dma_debug_phy, .dev = dev, .dev_addr = dma_addr, .size = size, @@ -1297,7 +1307,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, return; for_each_sg(sg, s, nents, i) { - check_for_stack(dev, sg_page(s), s->offset); + check_for_stack(dev, sg_phys(s)); if (!PageHighMem(sg_page(s))) check_for_illegal_area(dev, sg_virt(s), s->length); } @@ -1309,8 +1319,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset; + entry->paddr = sg_phys(s); entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1356,8 +1365,7 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1374,6 +1382,18 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, } } +static phys_addr_t virt_to_paddr(void *virt) +{ + struct page *page; + + if (is_vmalloc_addr(virt)) + page = vmalloc_to_page(virt); + else + page = virt_to_page(virt); + + return page_to_phys(page) + offset_in_page(virt); +} + void debug_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t dma_addr, void *virt, unsigned long attrs) @@ -1396,16 +1416,11 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->offset = offset_in_page(virt); + entry->paddr = virt_to_paddr(virt); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - add_dma_entry(entry, attrs); } @@ -1415,7 +1430,6 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .offset = offset_in_page(virt), .dev_addr = dma_addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1425,52 +1439,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} - -void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr, - unsigned long attrs) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_resource; - entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - add_dma_entry(entry, attrs); -} - -void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - struct dma_debug_entry ref = { - .type = dma_debug_resource, - .dev = dev, - .dev_addr = dma_addr, - .size = size, - .direction = direction, - }; + ref.paddr = virt_to_paddr(virt); if (unlikely(dma_debug_disabled())) return; @@ -1529,8 +1498,7 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(s), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1561,8 +1529,7 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, + .paddr = sg_phys(sg), .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1578,6 +1545,49 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } } +void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs) +{ + struct dma_debug_entry *entry; + + if (unlikely(dma_debug_disabled())) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_noncoherent; + entry->dev = dev; + entry->paddr = page_to_phys(page); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = direction; + + add_dma_entry(entry, attrs); +} + +void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_noncoherent, + .dev = dev, + .paddr = page_to_phys(page), + .dev_addr = dma_addr, + .size = size, + .direction = direction, + }; + + if (unlikely(dma_debug_disabled())) + return; + + check_unmap(&ref); +} + static int __init dma_debug_driver_setup(char *str) { int i; diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h index f525197d3cae..da7be0bddcf6 100644 --- a/kernel/dma/debug.h +++ b/kernel/dma/debug.h @@ -9,12 +9,11 @@ #define _KERNEL_DMA_DEBUG_H #ifdef CONFIG_DMA_API_DEBUG -extern void debug_dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - int direction, dma_addr_t dma_addr, +extern void debug_dma_map_phys(struct device *dev, phys_addr_t phys, + size_t size, int direction, dma_addr_t dma_addr, unsigned long attrs); -extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, +extern void debug_dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, int direction); extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, @@ -31,14 +30,6 @@ extern void debug_dma_alloc_coherent(struct device *dev, size_t size, extern void debug_dma_free_coherent(struct device *dev, size_t size, void *virt, dma_addr_t addr); -extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr, - size_t size, int direction, - dma_addr_t dma_addr, - unsigned long attrs); - -extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction); - extern void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction); @@ -54,15 +45,21 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev, extern void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction); +extern void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs); +extern void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr); #else /* CONFIG_DMA_API_DEBUG */ -static inline void debug_dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - int direction, dma_addr_t dma_addr, - unsigned long attrs) +static inline void debug_dma_map_phys(struct device *dev, phys_addr_t phys, + size_t size, int direction, + dma_addr_t dma_addr, unsigned long attrs) { } -static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, +static inline void debug_dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, int direction) { } @@ -90,19 +87,6 @@ static inline void debug_dma_free_coherent(struct device *dev, size_t size, { } -static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr, - size_t size, int direction, - dma_addr_t dma_addr, - unsigned long attrs) -{ -} - -static inline void debug_dma_unmap_resource(struct device *dev, - dma_addr_t dma_addr, size_t size, - int direction) -{ -} - static inline void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) @@ -126,5 +110,18 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev, int nelems, int direction) { } + +static inline void debug_dma_alloc_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr, + unsigned long attrs) +{ +} + +static inline void debug_dma_free_pages(struct device *dev, struct page *page, + size_t size, int direction, + dma_addr_t dma_addr) +{ +} #endif /* CONFIG_DMA_API_DEBUG */ #endif /* _KERNEL_DMA_DEBUG_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 9596ae1aa0da..50c3fe2a1d55 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include <linux/vmalloc.h> #include <linux/set_memory.h> #include <linux/slab.h> +#include <linux/pci-p2pdma.h> #include "direct.h" /* @@ -20,7 +21,7 @@ * it for entirely different regions. In that case the arch code needs to * override the variable below for dma-direct to work properly. */ -unsigned int zone_dma_bits __ro_after_init = 24; +u64 zone_dma_limit __ro_after_init = DMA_BIT_MASK(24); static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) @@ -59,7 +60,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) * zones. */ *phys_limit = dma_to_phys(dev, dma_limit); - if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + if (*phys_limit <= zone_dma_limit) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) return GFP_DMA32; @@ -119,7 +120,7 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp, bool allow_highmem) { int node = dev_to_node(dev); - struct page *page = NULL; + struct page *page; u64 phys_limit; WARN_ON_ONCE(!PAGE_ALIGNED(size)); @@ -130,30 +131,25 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); if (page) { - if (!dma_coherent_ok(dev, page_to_phys(page), size) || - (!allow_highmem && PageHighMem(page))) { - dma_free_contiguous(dev, page, size); - page = NULL; - } - } -again: - if (!page) - page = alloc_pages_node(node, gfp, get_order(size)); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + if (dma_coherent_ok(dev, page_to_phys(page), size) && + (allow_highmem || !PageHighMem(page))) + return page; + dma_free_contiguous(dev, page, size); - page = NULL; + } + + while ((page = alloc_pages_node(node, gfp, get_order(size))) + && !dma_coherent_ok(dev, page_to_phys(page), size)) { + __free_pages(page, get_order(size)); if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { + !(gfp & (GFP_DMA32 | GFP_DMA))) gfp |= GFP_DMA32; - goto again; - } - - if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { + else if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } + else + return NULL; } return page; @@ -220,13 +216,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp); if (!dev_is_dma_coherent(dev)) { - /* - * Fallback to the arch handler if it exists. This should - * eventually go away. - */ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !is_swiotlb_for_alloc(dev)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); @@ -240,27 +230,24 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_handle); /* - * Otherwise remap if the architecture is asking for it. But - * given that remapping memory is a blocking operation we'll - * instead have to dip into the atomic pools. + * Otherwise we require the architecture to either be able to + * mark arbitrary parts of the kernel direct mapping uncached, + * or remapped it uncached. */ + set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED); remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP); - if (remap) { - if (dma_direct_use_pool(dev, gfp)) - return dma_direct_alloc_from_pool(dev, size, - dma_handle, gfp); - } else { - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED)) - return NULL; - set_uncached = true; + if (!set_uncached && !remap) { + pr_warn_once("coherent DMA allocations not supported on this platform.\n"); + return NULL; } } /* - * Decrypting memory may block, so allocate the memory from the atomic - * pools if we can't block. + * Remapping or decrypting memory may block, allocate the memory from + * the atomic pools instead if we aren't allowed block. */ - if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp)) + if ((remap || force_dma_unencrypted(dev)) && + dma_direct_use_pool(dev, gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ @@ -295,7 +282,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, } else { ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; } memset(ret, 0, size); @@ -316,6 +303,8 @@ out_encrypt_pages: out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; +out_leak_pages: + return NULL; } void dma_direct_free(struct device *dev, size_t size, @@ -330,9 +319,7 @@ void dma_direct_free(struct device *dev, size_t size, return; } - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - !IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) && + if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) && !dev_is_dma_coherent(dev) && !is_swiotlb_for_alloc(dev)) { arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); @@ -378,12 +365,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, ret = page_address(page); if (dma_set_decrypted(dev, ret, size)) - goto out_free_pages; + goto out_leak_pages; memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; -out_free_pages: - __dma_direct_free_pages(dev, page, size); +out_leak_pages: return NULL; } @@ -414,9 +400,7 @@ void dma_direct_sync_sg_for_device(struct device *dev, for_each_sg(sgl, sg, nents, i) { phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_device(dev, paddr, sg->length, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, sg->length, @@ -440,9 +424,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_cpu(paddr, sg->length, dir); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, sg->length, - dir); + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, sg->length); @@ -466,7 +448,7 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, if (sg_dma_is_bus_address(sg)) sg_dma_unmark_bus_address(sg); else - dma_direct_unmap_page(dev, sg->dma_address, + dma_direct_unmap_phys(dev, sg->dma_address, sg_dma_len(sg), dir, attrs); } } @@ -476,34 +458,34 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct pci_p2pdma_map_state p2pdma_state = {}; - enum pci_p2pdma_map_type map; struct scatterlist *sg; int i, ret; for_each_sg(sgl, sg, nents, i) { - if (is_pci_p2pdma_page(sg_page(sg))) { - map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg); - switch (map) { - case PCI_P2PDMA_MAP_BUS_ADDR: - continue; - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - /* - * Any P2P mapping that traverses the PCI - * host bridge must be mapped with CPU physical - * address and not PCI bus addresses. This is - * done with dma_direct_map_page() below. - */ - break; - default: - ret = -EREMOTEIO; + switch (pci_p2pdma_state(&p2pdma_state, dev, sg_page(sg))) { + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Any P2P mapping that traverses the PCI host bridge + * must be mapped with CPU physical address and not PCI + * bus addresses. + */ + break; + case PCI_P2PDMA_MAP_NONE: + sg->dma_address = dma_direct_map_phys(dev, sg_phys(sg), + sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) { + ret = -EIO; goto out_unmap; } - } - - sg->dma_address = dma_direct_map_page(dev, sg_page(sg), - sg->offset, sg->length, dir, attrs); - if (sg->dma_address == DMA_MAPPING_ERROR) { - ret = -EIO; + break; + case PCI_P2PDMA_MAP_BUS_ADDR: + sg->dma_address = pci_p2pdma_bus_addr_map( + p2pdma_state.mem, sg_phys(sg)); + sg_dma_len(sg) = sg->length; + sg_dma_mark_bus_address(sg); + continue; + default: + ret = -EREMOTEIO; goto out_unmap; } sg_dma_len(sg) = sg->length; @@ -516,22 +498,6 @@ out_unmap: return ret; } -dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t dma_addr = paddr; - - if (unlikely(!dma_capable(dev, dma_addr, size, false))) { - dev_err_once(dev, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - WARN_ON_ONCE(1); - return DMA_MAPPING_ERROR; - } - - return dma_addr; -} - int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) @@ -594,10 +560,58 @@ int dma_direct_supported(struct device *dev, u64 mask) * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); + min_mask = min_t(u64, min_mask, zone_dma_limit); return mask >= phys_to_dma_unencrypted(dev, min_mask); } +static const struct bus_dma_region *dma_find_range(struct device *dev, + unsigned long start_pfn) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) { + unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start); + + if (start_pfn >= cpu_start_pfn && + start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) + return m; + } + + return NULL; +} + +/* + * To check whether all ram resource ranges are covered by dma range map + * Returns 0 when further check is needed + * Returns 1 if there is some RAM range can't be covered by dma_range_map + */ +static int check_ram_in_range_map(unsigned long start_pfn, + unsigned long nr_pages, void *data) +{ + unsigned long end_pfn = start_pfn + nr_pages; + struct device *dev = data; + + while (start_pfn < end_pfn) { + const struct bus_dma_region *bdr; + + bdr = dma_find_range(dev, start_pfn); + if (!bdr) + return 1; + + start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size); + } + + return 0; +} + +bool dma_direct_all_ram_mapped(struct device *dev) +{ + if (!dev->dma_range_map) + return true; + return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev, + check_ram_in_range_map); +} + size_t dma_direct_max_mapping_size(struct device *dev) { /* If SWIOTLB is active, use its maximum mapping size */ @@ -610,7 +624,7 @@ size_t dma_direct_max_mapping_size(struct device *dev) bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) { return !dev_is_dma_coherent(dev) || - is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr)); + swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr)); } /** @@ -648,7 +662,6 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, return -ENOMEM; map[0].cpu_start = cpu_start; map[0].dma_start = dma_start; - map[0].offset = offset; map[0].size = size; dev->dma_range_map = map; return 0; diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h index 97ec892ea0b5..da2fadf45bcd 100644 --- a/kernel/dma/direct.h +++ b/kernel/dma/direct.h @@ -20,6 +20,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs); +bool dma_direct_all_ram_mapped(struct device *dev); size_t dma_direct_max_mapping_size(struct device *dev); #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ @@ -57,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev, { phys_addr_t paddr = dma_to_phys(dev, addr); - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_device(dev, paddr, size, dir); + swiotlb_sync_single_for_device(dev, paddr, size, dir); if (!dev_is_dma_coherent(dev)) arch_sync_dma_for_device(paddr, size, dir); @@ -74,54 +74,67 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, arch_sync_dma_for_cpu_all(); } - if (unlikely(is_swiotlb_buffer(dev, paddr))) - swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); if (dir == DMA_FROM_DEVICE) arch_dma_mark_clean(paddr, size); } -static inline dma_addr_t dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) +static inline dma_addr_t dma_direct_map_phys(struct device *dev, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) { - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); + dma_addr_t dma_addr; if (is_swiotlb_force_bounce(dev)) { - if (is_pci_p2pdma_page(page)) - return DMA_MAPPING_ERROR; + if (attrs & DMA_ATTR_MMIO) + goto err_overflow; + return swiotlb_map(dev, phys, size, dir, attrs); } - if (unlikely(!dma_capable(dev, dma_addr, size, true)) || - dma_kmalloc_needs_bounce(dev, size, dir)) { - if (is_pci_p2pdma_page(page)) - return DMA_MAPPING_ERROR; - if (is_swiotlb_active(dev)) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; + if (attrs & DMA_ATTR_MMIO) { + dma_addr = phys; + if (unlikely(!dma_capable(dev, dma_addr, size, false))) + goto err_overflow; + } else { + dma_addr = phys_to_dma(dev, phys); + if (unlikely(!dma_capable(dev, dma_addr, size, true)) || + dma_kmalloc_needs_bounce(dev, size, dir)) { + if (is_swiotlb_active(dev)) + return swiotlb_map(dev, phys, size, dir, attrs); + + goto err_overflow; + } } - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + if (!dev_is_dma_coherent(dev) && + !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) arch_sync_dma_for_device(phys, size, dir); return dma_addr; + +err_overflow: + dev_WARN_ONCE( + dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; } -static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, +static inline void dma_direct_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - phys_addr_t phys = dma_to_phys(dev, addr); + phys_addr_t phys; + + if (attrs & DMA_ATTR_MMIO) + /* nothing to do: uncached and no swiotlb */ + return; + phys = dma_to_phys(dev, addr); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); - if (unlikely(is_swiotlb_buffer(dev, phys))) - swiotlb_tbl_unmap_single(dev, phys, size, dir, + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); } #endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index b492d59ac77e..16a51736a2a3 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -11,12 +11,20 @@ static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; } -static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +static dma_addr_t dma_dummy_map_phys(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) { return DMA_MAPPING_ERROR; } +static void dma_dummy_unmap_phys(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_phys, so unmap_page should never be + * called. + */ + WARN_ON_ONCE(true); +} static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, @@ -25,6 +33,16 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, return -EINVAL; } +static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. + */ + WARN_ON_ONCE(true); +} + static int dma_dummy_supported(struct device *hwdev, u64 mask) { return 0; @@ -32,7 +50,9 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask) const struct dma_map_ops dma_dummy_ops = { .mmap = dma_dummy_mmap, - .map_page = dma_dummy_map_page, + .map_phys = dma_dummy_map_phys, + .unmap_phys = dma_dummy_unmap_phys, .map_sg = dma_dummy_map_sg, + .unmap_sg = dma_dummy_unmap_sg, .dma_supported = dma_dummy_supported, }; diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 02205ab53b7e..794041a39e65 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -11,13 +11,13 @@ #include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/kthread.h> -#include <linux/map_benchmark.h> #include <linux/math64.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/timekeeping.h> +#include <uapi/linux/map_benchmark.h> struct map_benchmark_data { struct map_benchmark bparam; @@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data) atomic64_add(map_sq, &map->sum_sq_map); atomic64_add(unmap_sq, &map->sum_sq_unmap); atomic64_inc(&map->loops); + + /* + * We may test for a long time so periodically check whether + * we need to schedule to avoid starving the others. Otherwise + * we may hangup the kernel in a non-preemptible kernel when + * the test kthreads number >= CPU number, the test kthreads + * will run endless on every CPU since the thread resposible + * for notifying the kthread stop (in do_map_benchmark()) + * could not be scheduled. + * + * Note this may degrade the test concurrency since the test + * threads may need to share the CPU time with other load + * in the system. So it's recommended to run this benchmark + * on an idle system. + */ + cond_resched(); } out: @@ -101,7 +117,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) struct task_struct **tsk; int threads = map->bparam.threads; int node = map->bparam.node; - const cpumask_t *cpu_mask = cpumask_of_node(node); u64 loops; int ret = 0; int i; @@ -118,11 +133,13 @@ static int do_map_benchmark(struct map_benchmark_data *map) if (IS_ERR(tsk[i])) { pr_err("create dma_map thread failed\n"); ret = PTR_ERR(tsk[i]); + while (--i >= 0) + kthread_stop(tsk[i]); goto out; } if (node != NUMA_NO_NODE) - kthread_bind_mask(tsk[i], cpu_mask); + kthread_bind_mask(tsk[i], cpumask_of_node(node)); } /* clear the old value in the previous benchmark */ @@ -139,13 +156,17 @@ static int do_map_benchmark(struct map_benchmark_data *map) msleep_interruptible(map->bparam.seconds * 1000); - /* wait for the completion of benchmark threads */ + /* wait for the completion of all started benchmark threads */ for (i = 0; i < threads; i++) { - ret = kthread_stop(tsk[i]); - if (ret) - goto out; + int kthread_ret = kthread_stop_put(tsk[i]); + + if (kthread_ret) + ret = kthread_ret; } + if (ret) + goto out; + loops = atomic64_read(&map->loops); if (likely(loops > 0)) { u64 map_variance, unmap_variance; @@ -170,8 +191,6 @@ static int do_map_benchmark(struct map_benchmark_data *map) } out: - for (i = 0; i < threads; i++) - put_task_struct(tsk[i]); put_device(map->dev); kfree(tsk); return ret; @@ -208,7 +227,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, } if (map->bparam.node != NUMA_NO_NODE && - !node_possible(map->bparam.node)) { + (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES || + !node_possible(map->bparam.node))) { pr_err("invalid numa node\n"); return -EINVAL; } @@ -252,6 +272,9 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, * dma_mask changed by benchmark */ dma_set_mask(map->dev, old_dma_mask); + + if (ret) + return ret; break; default: return -EINVAL; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index e323ca48f7f2..37163eb49f9f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -10,6 +10,7 @@ #include <linux/dma-map-ops.h> #include <linux/export.h> #include <linux/gfp.h> +#include <linux/iommu-dma.h> #include <linux/kmsan.h> #include <linux/of_device.h> #include <linux/slab.h> @@ -17,6 +18,9 @@ #include "debug.h" #include "direct.h" +#define CREATE_TRACE_POINTS +#include <trace/events/dma.h> + #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) @@ -67,8 +71,8 @@ void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, { struct dma_devres match_data = { size, vaddr, dma_handle }; - dma_free_coherent(dev, size, vaddr, dma_handle); WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data)); + dma_free_coherent(dev, size, vaddr, dma_handle); } EXPORT_SYMBOL(dmam_free_coherent); @@ -116,8 +120,12 @@ EXPORT_SYMBOL(dmam_alloc_attrs); static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { + if (use_dma_iommu(dev)) + return false; + if (likely(!ops)) return true; + #ifdef CONFIG_DMA_OPS_BYPASS if (dev->dma_ops_bypass) return min_not_zero(mask, dev->bus_dma_limit) >= @@ -144,12 +152,12 @@ static inline bool dma_map_direct(struct device *dev, return dma_go_direct(dev, *dev->dma_mask, ops); } -dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, - size_t offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t dma_map_phys(struct device *dev, phys_addr_t phys, size_t size, + enum dma_data_direction dir, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; + bool is_mmio = attrs & DMA_ATTR_MMIO; + dma_addr_t addr = DMA_MAPPING_ERROR; BUG_ON(!valid_dma_direction(dir)); @@ -157,29 +165,65 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, return DMA_MAPPING_ERROR; if (dma_map_direct(dev, ops) || - arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - else - addr = ops->map_page(dev, page, offset, size, dir, attrs); - kmsan_handle_dma(page, offset, size, dir); - debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); + (!is_mmio && arch_dma_map_phys_direct(dev, phys + size))) + addr = dma_direct_map_phys(dev, phys, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_phys(dev, phys, size, dir, attrs); + else if (ops->map_phys) + addr = ops->map_phys(dev, phys, size, dir, attrs); + + if (!is_mmio) + kmsan_handle_dma(phys, size, dir); + trace_dma_map_phys(dev, phys, addr, size, dir, attrs); + debug_dma_map_phys(dev, phys, size, dir, addr, attrs); return addr; } +EXPORT_SYMBOL_GPL(dma_map_phys); + +dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + + if (unlikely(attrs & DMA_ATTR_MMIO)) + return DMA_MAPPING_ERROR; + + if (IS_ENABLED(CONFIG_DMA_API_DEBUG) && + WARN_ON_ONCE(is_zone_device_page(page))) + return DMA_MAPPING_ERROR; + + return dma_map_phys(dev, phys, size, dir, attrs); +} EXPORT_SYMBOL(dma_map_page_attrs); -void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, +void dma_unmap_phys(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); + bool is_mmio = attrs & DMA_ATTR_MMIO; BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops) || - arch_dma_unmap_page_direct(dev, addr + size)) - dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir); + (!is_mmio && arch_dma_unmap_phys_direct(dev, addr + size))) + dma_direct_unmap_phys(dev, addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_phys(dev, addr, size, dir, attrs); + else if (ops->unmap_phys) + ops->unmap_phys(dev, addr, size, dir, attrs); + trace_dma_unmap_phys(dev, addr, size, dir, attrs); + debug_dma_unmap_phys(dev, addr, size, dir); +} +EXPORT_SYMBOL_GPL(dma_unmap_phys); + +void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + if (unlikely(attrs & DMA_ATTR_MMIO)) + return; + + dma_unmap_phys(dev, addr, size, dir, attrs); } EXPORT_SYMBOL(dma_unmap_page_attrs); @@ -197,14 +241,18 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); if (ents > 0) { kmsan_handle_dma_sg(sg, nents, dir); + trace_dma_map_sg(dev, sg, nents, ents, dir, attrs); debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && ents != -EIO && ents != -EREMOTEIO)) { + trace_dma_map_sg_err(dev, sg, nents, ents, dir, attrs); return -EIO; } @@ -287,10 +335,13 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); + trace_dma_unmap_sg(dev, sg, nents, dir, attrs); debug_dma_unmap_sg(dev, sg, nents, dir); if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } @@ -299,37 +350,23 @@ EXPORT_SYMBOL(dma_unmap_sg_attrs); dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr = DMA_MAPPING_ERROR; - - BUG_ON(!valid_dma_direction(dir)); - - if (WARN_ON_ONCE(!dev->dma_mask)) + if (IS_ENABLED(CONFIG_DMA_API_DEBUG) && + WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) return DMA_MAPPING_ERROR; - if (dma_map_direct(dev, ops)) - addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); - else if (ops->map_resource) - addr = ops->map_resource(dev, phys_addr, size, dir, attrs); - - debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); - return addr; + return dma_map_phys(dev, phys_addr, size, dir, attrs | DMA_ATTR_MMIO); } EXPORT_SYMBOL(dma_map_resource); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (!dma_map_direct(dev, ops) && ops->unmap_resource) - ops->unmap_resource(dev, addr, size, dir, attrs); - debug_dma_unmap_resource(dev, addr, size, dir); + dma_unmap_phys(dev, addr, size, dir, attrs | DMA_ATTR_MMIO); } EXPORT_SYMBOL(dma_unmap_resource); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, +#ifdef CONFIG_DMA_NEED_SYNC +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -337,13 +374,16 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); + trace_dma_sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_cpu); +EXPORT_SYMBOL(__dma_sync_single_for_cpu); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -351,13 +391,16 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); + trace_dma_sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } -EXPORT_SYMBOL(dma_sync_single_for_device); +EXPORT_SYMBOL(__dma_sync_single_for_device); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -365,13 +408,16 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); + trace_dma_sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); +EXPORT_SYMBOL(__dma_sync_sg_for_cpu); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -379,11 +425,72 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); + trace_dma_sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL(__dma_sync_sg_for_device); + +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + /* + * dma_skip_sync could've been reset on first SWIOTLB buffer + * mapping, but @dma_addr is not necessary an SWIOTLB buffer. + * In this case, fall back to more granular check. + */ + return dma_direct_need_sync(dev, dma_addr); + return true; +} +EXPORT_SYMBOL_GPL(__dma_need_sync); + +/** + * dma_need_unmap - does this device need dma_unmap_* operations + * @dev: device to check + * + * If this function returns %false, drivers can skip calling dma_unmap_* after + * finishing an I/O. This function must be called after all mappings that might + * need to be unmapped have been performed. + */ +bool dma_need_unmap(struct device *dev) +{ + if (!dma_map_direct(dev, get_dma_ops(dev))) + return true; + if (!dev->dma_skip_sync) + return true; + return IS_ENABLED(CONFIG_DMA_API_DEBUG); +} +EXPORT_SYMBOL_GPL(dma_need_unmap); + +static void dma_setup_need_sync(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) + /* + * dma_skip_sync will be reset to %false on first SWIOTLB buffer + * mapping, if any. During the device initialization, it's + * enough to check only for the DMA coherence. + */ + dev->dma_skip_sync = dev_is_dma_coherent(dev); + else if (!ops->sync_single_for_device && !ops->sync_single_for_cpu && + !ops->sync_sg_for_device && !ops->sync_sg_for_cpu) + /* + * Synchronization is not possible when none of DMA sync ops + * is set. + */ + dev->dma_skip_sync = true; + else + dev->dma_skip_sync = false; +} +#else /* !CONFIG_DMA_NEED_SYNC */ +static inline void dma_setup_need_sync(struct device *dev) { } +#endif /* !CONFIG_DMA_NEED_SYNC */ /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems @@ -405,6 +512,9 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); if (!ops->get_sgtable) return -ENXIO; return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); @@ -441,6 +551,8 @@ bool dma_can_mmap(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); + if (use_dma_iommu(dev)) + return true; return ops->mmap != NULL; } EXPORT_SYMBOL_GPL(dma_can_mmap); @@ -467,6 +579,9 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); if (!ops->mmap) return -ENXIO; return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -479,6 +594,10 @@ u64 dma_get_required_mask(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_get_required_mask(dev); + + if (use_dma_iommu(dev)) + return DMA_BIT_MASK(32); + if (ops->get_required_mask) return ops->get_required_mask(dev); @@ -510,19 +629,29 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (WARN_ON_ONCE(flag & __GFP_COMP)) return NULL; - if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) { + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, + DMA_BIDIRECTIONAL, flag, attrs); return cpu_addr; + } /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (dma_alloc_direct(dev, ops)) + if (dma_alloc_direct(dev, ops)) { cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); - else if (ops->alloc) + } else if (use_dma_iommu(dev)) { + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); + } else if (ops->alloc) { cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - else + } else { + trace_dma_alloc(dev, NULL, 0, size, DMA_BIDIRECTIONAL, flag, + attrs); return NULL; + } + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, DMA_BIDIRECTIONAL, + flag, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } @@ -544,12 +673,16 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, */ WARN_ON(irqs_disabled()); + trace_dma_free(dev, cpu_addr, dma_handle, size, DMA_BIDIRECTIONAL, + attrs); if (!cpu_addr) return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } @@ -570,9 +703,11 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!ops->alloc_pages) + if (use_dma_iommu(dev)) + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!ops->alloc_pages_op) return NULL; - return ops->alloc_pages(dev, size, dma_handle, dir, gfp); + return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); } struct page *dma_alloc_pages(struct device *dev, size_t size, @@ -580,8 +715,13 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, { struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); - if (page) - debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + if (page) { + trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle, + size, dir, gfp, 0); + debug_dma_alloc_pages(dev, page, size, dir, *dma_handle, 0); + } else { + trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0); + } return page; } EXPORT_SYMBOL_GPL(dma_alloc_pages); @@ -594,6 +734,8 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (use_dma_iommu(dev)) + dma_common_free_pages(dev, size, page, dma_handle, dir); else if (ops->free_pages) ops->free_pages(dev, size, page, dma_handle, dir); } @@ -601,7 +743,8 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) { - debug_dma_unmap_page(dev, dma_handle, size, dir); + trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0); + debug_dma_free_pages(dev, page, size, dir, dma_handle); __dma_free_pages(dev, size, page, dma_handle, dir); } EXPORT_SYMBOL_GPL(dma_free_pages); @@ -646,7 +789,6 @@ out_free_sgt: struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) { - const struct dma_map_ops *ops = get_dma_ops(dev); struct sg_table *sgt; if (WARN_ON_ONCE(attrs & ~DMA_ATTR_ALLOC_SINGLE_PAGES)) @@ -654,14 +796,17 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (WARN_ON_ONCE(gfp & __GFP_COMP)) return NULL; - if (ops && ops->alloc_noncontiguous) - sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); + if (use_dma_iommu(dev)) + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); else sgt = alloc_single_sgt(dev, size, dir, gfp); if (sgt) { sgt->nents = 1; + trace_dma_alloc_sgt(dev, sgt, size, dir, gfp, attrs); debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); + } else { + trace_dma_alloc_sgt_err(dev, NULL, 0, size, dir, gfp, attrs); } return sgt; } @@ -679,11 +824,11 @@ static void free_single_sgt(struct device *dev, size_t size, void dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { - const struct dma_map_ops *ops = get_dma_ops(dev); - + trace_dma_free_sgt(dev, sgt, size, dir); debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); - if (ops && ops->free_noncontiguous) - ops->free_noncontiguous(dev, size, sgt, dir); + + if (use_dma_iommu(dev)) + iommu_dma_free_noncontiguous(dev, size, sgt, dir); else free_single_sgt(dev, size, sgt, dir); } @@ -692,37 +837,26 @@ EXPORT_SYMBOL_GPL(dma_free_noncontiguous); void *dma_vmap_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt) { - const struct dma_map_ops *ops = get_dma_ops(dev); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - if (ops && ops->alloc_noncontiguous) - return vmap(sgt_handle(sgt)->pages, count, VM_MAP, PAGE_KERNEL); + if (use_dma_iommu(dev)) + return iommu_dma_vmap_noncontiguous(dev, size, sgt); + return page_address(sg_page(sgt->sgl)); } EXPORT_SYMBOL_GPL(dma_vmap_noncontiguous); void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops && ops->alloc_noncontiguous) - vunmap(vaddr); + if (use_dma_iommu(dev)) + iommu_dma_vunmap_noncontiguous(dev, vaddr); } EXPORT_SYMBOL_GPL(dma_vunmap_noncontiguous); int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, size_t size, struct sg_table *sgt) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops && ops->alloc_noncontiguous) { - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - if (vma->vm_pgoff >= count || - vma_pages(vma) > count - vma->vm_pgoff) - return -ENXIO; - return vm_map_pages(vma, sgt_handle(sgt)->pages, count); - } + if (use_dma_iommu(dev)) + return iommu_dma_mmap_noncontiguous(dev, vma, size, sgt); return dma_mmap_pages(dev, vma, size, sg_page(sgt->sgl)); } EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous); @@ -731,32 +865,37 @@ static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) { + if (WARN_ON(ops)) + return false; + return true; + } + /* - * ->dma_supported sets the bypass flag, so we must always call - * into the method here unless the device is truly direct mapped. + * ->dma_supported sets and clears the bypass flag, so ignore it here + * and always call into the method if there is one. */ - if (!ops) - return dma_direct_supported(dev, mask); - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); + if (ops) { + if (!ops->dma_supported) + return true; + return ops->dma_supported(dev, mask); + } + + return dma_direct_supported(dev, mask); } bool dma_pci_p2pdma_supported(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - /* if ops is not set, dma direct will be used which supports P2PDMA */ - if (!ops) - return true; - /* * Note: dma_ops_bypass is not checked here because P2PDMA should * not be used with dma mapping ops that do not have support even * if the specific device is bypassing them. */ - return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ + return !ops; } EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); @@ -773,6 +912,8 @@ int dma_set_mask(struct device *dev, u64 mask) arch_dma_set_mask(dev, mask); *dev->dma_mask = mask; + dma_setup_need_sync(dev); + return 0; } EXPORT_SYMBOL(dma_set_mask); @@ -793,6 +934,37 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +static bool __dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops) || use_dma_iommu(dev)) + return false; + return !dma_direct_all_ram_mapped(dev); +} + +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +bool dma_addressing_limited(struct device *dev) +{ + if (!__dma_addressing_limited(dev)) + return false; + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; +} +EXPORT_SYMBOL_GPL(dma_addressing_limited); + size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -800,6 +972,8 @@ size_t dma_max_mapping_size(struct device *dev) if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); + else if (use_dma_iommu(dev)) + size = iommu_dma_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -812,27 +986,22 @@ size_t dma_opt_mapping_size(struct device *dev) const struct dma_map_ops *ops = get_dma_ops(dev); size_t size = SIZE_MAX; - if (ops && ops->opt_mapping_size) + if (use_dma_iommu(dev)) + size = iommu_dma_opt_mapping_size(); + else if (ops && ops->opt_mapping_size) size = ops->opt_mapping_size(); return min(dma_max_mapping_size(dev), size); } EXPORT_SYMBOL_GPL(dma_opt_mapping_size); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_map_direct(dev, ops)) - return dma_direct_need_sync(dev, dma_addr); - return ops->sync_single_for_cpu || ops->sync_single_for_device; -} -EXPORT_SYMBOL_GPL(dma_need_sync); - unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) + return iommu_dma_get_merge_boundary(dev); + if (!ops || !ops->get_merge_boundary) return 0; /* can't merge */ diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index af4a6ef48ce0..20caf9cabf69 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -4,6 +4,7 @@ * the allocated memory contains normal pages in the direct kernel mapping. */ #include <linux/dma-map-ops.h> +#include <linux/iommu-dma.h> static struct page *dma_common_vaddr_to_page(void *cpu_addr) { @@ -63,6 +64,7 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size, { const struct dma_map_ops *ops = get_dma_ops(dev); struct page *page; + phys_addr_t phys; page = dma_alloc_contiguous(dev, size, gfp); if (!page) @@ -70,8 +72,13 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - *dma_handle = ops->map_page(dev, page, 0, size, dir, - DMA_ATTR_SKIP_CPU_SYNC); + phys = page_to_phys(page); + if (use_dma_iommu(dev)) + *dma_handle = iommu_dma_map_phys(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else + *dma_handle = ops->map_phys(dev, phys, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); if (*dma_handle == DMA_MAPPING_ERROR) { dma_free_contiguous(dev, page, size); return NULL; @@ -86,8 +93,11 @@ void dma_common_free_pages(struct device *dev, size_t size, struct page *page, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (ops->unmap_page) - ops->unmap_page(dev, dma_handle, size, dir, + if (use_dma_iommu(dev)) + iommu_dma_unmap_phys(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else if (ops->unmap_phys) + ops->unmap_phys(dev, dma_handle, size, dir, DMA_ATTR_SKIP_CPU_SYNC); dma_free_contiguous(dev, page, size); } diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 1acec2e22827..ee45dee33d49 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -70,9 +70,9 @@ static bool cma_in_zone(gfp_t gfp) /* CMA can't cross zone boundaries, see cma_activate_area() */ end = cma_get_base(cma) + size - 1; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) - return end <= DMA_BIT_MASK(zone_dma_bits); + return end <= zone_dma_limit; if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) - return end <= DMA_BIT_MASK(32); + return end <= max(DMA_BIT_MASK(32), zone_dma_limit); return true; } @@ -84,8 +84,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, void *addr; int ret = -ENOMEM; - /* Cannot allocate larger than MAX_ORDER */ - order = min(get_order(pool_size), MAX_ORDER); + /* Cannot allocate larger than MAX_PAGE_ORDER */ + order = min(get_order(pool_size), MAX_PAGE_ORDER); do { pool_size = 1 << (PAGE_SHIFT + order); @@ -102,8 +102,8 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #ifdef CONFIG_DMA_DIRECT_REMAP addr = dma_common_contiguous_remap(page, pool_size, - pgprot_dmacoherent(PAGE_KERNEL), - __builtin_return_address(0)); + pgprot_decrypted(pgprot_dmacoherent(PAGE_KERNEL)), + __builtin_return_address(0)); if (!addr) goto free_page; #else @@ -135,9 +135,9 @@ encrypt_mapping: remove_mapping: #ifdef CONFIG_DMA_DIRECT_REMAP dma_common_free_remap(addr, pool_size); -#endif -free_page: __maybe_unused +free_page: __free_pages(page, order); +#endif out: return ret; } @@ -190,7 +190,7 @@ static int __init dma_atomic_pool_init(void) /* * If coherent_pool was not used on the command line, default the pool - * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER. + * sizes to 128KB per 1GB of memory, min 128KB, max MAX_PAGE_ORDER. */ if (!atomic_pool_size) { unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 27596f3b4aef..b7c1c0c92d0c 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -10,8 +10,10 @@ struct page **dma_common_find_pages(void *cpu_addr) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) + if (!area || !(area->flags & VM_DMA_COHERENT)) return NULL; + WARN(area->flags != VM_DMA_COHERENT, + "unexpected flags in area: %p\n", cpu_addr); return area->pages; } @@ -47,7 +49,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size, if (!pages) return NULL; for (i = 0; i < count; i++) - pages[i] = nth_page(page, i); + pages[i] = page++; vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); kvfree(pages); @@ -61,7 +63,7 @@ void dma_common_free_remap(void *cpu_addr, size_t size) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) { + if (!area || !(area->flags & VM_DMA_COHERENT)) { WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 394494a6b1f3..a547c7693135 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -61,19 +61,20 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) - /** * struct io_tlb_slot - IO TLB slot descriptor * @orig_addr: The original address corresponding to a mapped entry. * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; - unsigned int list; + unsigned short list; + unsigned short pad_slots; }; static bool swiotlb_force_bounce; @@ -283,9 +284,11 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, } for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); @@ -399,14 +402,13 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, } mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area), - default_nareas), SMP_CACHE_BYTES); + nareas), SMP_CACHE_BYTES); if (!mem->areas) { pr_warn("%s: Failed to allocate mem->areas.\n", __func__); return; } - swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, - default_nareas); + swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas); add_mem_pool(&io_tlb_default_mem, mem); if (flags & SWIOTLB_VERBOSE) @@ -446,9 +448,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, if (!remap) io_tlb_default_mem.can_grow = true; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits); + io_tlb_default_mem.phys_limit = zone_dma_limit; else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32); + io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); else io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); #endif @@ -559,29 +561,40 @@ void __init swiotlb_exit(void) * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * - * Return: Decrypted pages, or %NULL on failure. + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. */ -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; + phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; - vaddr = page_address(page); + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } + + vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: - __free_pages(page, order); + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); return NULL; } @@ -614,16 +627,12 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, } gfp &= ~GFP_ZONEMASK; - if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + if (phys_limit <= zone_dma_limit) gfp |= __GFP_DMA; else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; - while ((page = alloc_dma_pages(gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) @@ -679,6 +688,11 @@ static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev, size_t pool_size; size_t tlb_size; + if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) { + nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER; + nareas = limit_nareas(nareas, nslabs); + } + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); pool = kzalloc(pool_size, gfp); if (!pool) @@ -729,9 +743,6 @@ static void swiotlb_dyn_alloc(struct work_struct *work) } add_mem_pool(mem, pool); - - /* Pairs with smp_rmb() in is_swiotlb_buffer(). */ - smp_wmb(); } /** @@ -750,16 +761,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu) } /** - * swiotlb_find_pool() - find the IO TLB pool for a physical address + * __swiotlb_find_pool() - find the IO TLB pool for a physical address * @dev: Device which has mapped the DMA buffer. * @paddr: Physical address within the DMA buffer. * * Find the IO TLB memory pool descriptor which contains the given physical - * address, if any. + * address, if any. This function is for use only when the dev is known to + * be using swiotlb. Use swiotlb_find_pool() for the more general case + * when this condition is not met. * * Return: Memory pool which contains @paddr, or %NULL if none. */ -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr) +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; @@ -812,47 +825,60 @@ void swiotlb_dev_init(struct device *dev) #endif } -/* - * Return the offset into a iotlb slot required to keep the device happy. +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. */ -static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) { - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* * Bounce: copy the swiotlb buffer from or back to the original dma location */ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; - unsigned int tlb_offset, orig_addr_offset; + int tlb_offset; if (orig_addr == INVALID_PHYS_ADDR) return; - tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); - orig_addr_offset = swiotlb_align_offset(dev, orig_addr); - if (tlb_offset < orig_addr_offset) { - dev_WARN_ONCE(dev, 1, - "Access before mapping start detected. orig offset %u, requested offset %u.\n", - orig_addr_offset, tlb_offset); - return; - } - - tlb_offset -= orig_addr_offset; - if (tlb_offset > alloc_size) { - dev_WARN_ONCE(dev, 1, - "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", - alloc_size, size, tlb_offset); - return; - } + /* + * It's valid for tlb_offset to be negative. This can happen when the + * "offset" returned by swiotlb_align_offset() is non-zero, and the + * tlb_addr is pointing within the first "offset" bytes of the second + * or subsequent slots of the allocated swiotlb area. While it's not + * valid for tlb_addr to be pointing within the first "offset" bytes + * of the first slot, there's no way to check for such an error since + * this function can't distinguish the first slot from the second and + * subsequent slots. + */ + tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(dev, 0, orig_addr); orig_addr += tlb_offset; alloc_size -= tlb_offset; @@ -947,8 +973,30 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) } #endif /* CONFIG_DEBUG_FS */ +#ifdef CONFIG_SWIOTLB_DYNAMIC +#ifdef CONFIG_DEBUG_FS +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_add(nslots, &mem->transient_nslabs); +} + +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ + atomic_long_sub(nslots, &mem->transient_nslabs); +} + +#else /* !CONFIG_DEBUG_FS */ +static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_SWIOTLB_DYNAMIC */ + /** - * swiotlb_area_find_slots() - search for slots in one IO TLB memory area + * swiotlb_search_pool_area() - search one memory area in one pool * @dev: Device which maps the buffer. * @pool: Memory pool to be searched. * @area_index: Index of the IO TLB memory area to be searched. @@ -963,7 +1011,7 @@ static void dec_used(struct io_tlb_mem *mem, unsigned int nslots) * * Return: Index of the first allocated slot, or -1 on error. */ -static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, +static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool, int area_index, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask) { @@ -972,10 +1020,9 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int iotlb_align_mask = - dma_get_min_align_mask(dev) | alloc_align_mask; + unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; @@ -985,18 +1032,29 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, BUG_ON(area_index >= pool->nareas); /* - * For allocations of PAGE_SIZE or larger only look for page aligned - * allocations. + * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be + * page-aligned in the absence of any other alignment requirements. + * 'alloc_align_mask' was later introduced to specify the alignment + * explicitly, however this is passed as zero for streaming mappings + * and so we preserve the old behaviour there in case any drivers are + * relying on it. + */ + if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) + alloc_align_mask = PAGE_SIZE - 1; + + /* + * Ensure that the allocation is at least slot-aligned and update + * 'iotlb_align_mask' to ignore bits that will be preserved when + * offsetting into the allocation. */ - if (alloc_size >= PAGE_SIZE) - iotlb_align_mask |= ~PAGE_MASK; - iotlb_align_mask &= ~(IO_TLB_SIZE - 1); + alloc_align_mask |= (IO_TLB_SIZE - 1); + iotlb_align_mask &= ~alloc_align_mask; /* * For mappings with an alignment requirement don't bother looping to * unaligned slots once we found an aligned one. */ - stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; + stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > pool->area_nslabs - area->used)) @@ -1006,11 +1064,14 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, index = area->index; for (slots_checked = 0; slots_checked < pool->area_nslabs; ) { + phys_addr_t tlb_addr; + slot_index = slot_base + index; + tlb_addr = slot_addr(tbl_dma_addr, slot_index); - if (orig_addr && - (slot_addr(tbl_dma_addr, slot_index) & - iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { + if ((tlb_addr & alloc_align_mask) || + (orig_addr && (tlb_addr & iotlb_align_mask) != + (orig_addr & iotlb_align_mask))) { index = wrap_area_index(pool, index + 1); slots_checked++; continue; @@ -1057,41 +1118,50 @@ found: return slot_index; } +#ifdef CONFIG_SWIOTLB_DYNAMIC + /** - * swiotlb_pool_find_slots() - search for slots in one memory pool + * swiotlb_search_area() - search one memory area in all pools * @dev: Device which maps the buffer. - * @pool: Memory pool to be searched. + * @start_cpu: Start CPU number. + * @cpu_offset: Offset from @start_cpu. * @orig_addr: Original (non-bounced) IO buffer address. * @alloc_size: Total requested size of the bounce buffer, * including initial alignment padding. * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. * - * Search through one memory pool to find a sequence of slots that match the + * Search one memory area in all pools for a sequence of slots that match the * allocation constraints. * * Return: Index of the first allocated slot, or -1 on error. */ -static int swiotlb_pool_find_slots(struct device *dev, struct io_tlb_pool *pool, - phys_addr_t orig_addr, size_t alloc_size, - unsigned int alloc_align_mask) +static int swiotlb_search_area(struct device *dev, int start_cpu, + int cpu_offset, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { - int start = raw_smp_processor_id() & (pool->nareas - 1); - int i = start, index; - - do { - index = swiotlb_area_find_slots(dev, pool, i, orig_addr, - alloc_size, alloc_align_mask); - if (index >= 0) - return index; - if (++i >= pool->nareas) - i = 0; - } while (i != start); + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + struct io_tlb_pool *pool; + int area_index; + int index = -1; - return -1; + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) { + if (cpu_offset >= pool->nareas) + continue; + area_index = (start_cpu + cpu_offset) & (pool->nareas - 1); + index = swiotlb_search_pool_area(dev, pool, area_index, + orig_addr, alloc_size, + alloc_align_mask); + if (index >= 0) { + *retpool = pool; + break; + } + } + rcu_read_unlock(); + return index; } -#ifdef CONFIG_SWIOTLB_DYNAMIC - /** * swiotlb_find_slots() - search for slots in the whole swiotlb * @dev: Device which maps the buffer. @@ -1115,18 +1185,20 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, unsigned long nslabs; unsigned long flags; u64 phys_limit; + int cpu, i; int index; - rcu_read_lock(); - list_for_each_entry_rcu(pool, &mem->pools, node) { - index = swiotlb_pool_find_slots(dev, pool, orig_addr, - alloc_size, alloc_align_mask); - if (index >= 0) { - rcu_read_unlock(); + if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE) + return -1; + + cpu = raw_smp_processor_id(); + for (i = 0; i < default_nareas; ++i) { + index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size, + alloc_align_mask, &pool); + if (index >= 0) goto found; - } } - rcu_read_unlock(); + if (!mem->can_grow) return -1; @@ -1135,12 +1207,12 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, nslabs = nr_slots(alloc_size); phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit); pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit, - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); if (!pool) return -1; - index = swiotlb_pool_find_slots(dev, pool, orig_addr, - alloc_size, alloc_align_mask); + index = swiotlb_search_pool_area(dev, pool, 0, orig_addr, + alloc_size, alloc_align_mask); if (index < 0) { swiotlb_dyn_free(&pool->rcu); return -1; @@ -1150,11 +1222,29 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, spin_lock_irqsave(&dev->dma_io_tlb_lock, flags); list_add_rcu(&pool->node, &dev->dma_io_tlb_pools); spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags); + inc_transient_used(mem, pool->nslabs); found: - dev->dma_uses_io_tlb = true; - /* Pairs with smp_rmb() in is_swiotlb_buffer() */ - smp_wmb(); + WRITE_ONCE(dev->dma_uses_io_tlb, true); + + /* + * The general barrier orders reads and writes against a presumed store + * of the SWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in swiotlb_find_pool(). + */ + smp_mb(); *retpool = pool; return index; @@ -1166,9 +1256,21 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, size_t alloc_size, unsigned int alloc_align_mask, struct io_tlb_pool **retpool) { - *retpool = &dev->dma_io_tlb_mem->defpool; - return swiotlb_pool_find_slots(dev, *retpool, - orig_addr, alloc_size, alloc_align_mask); + struct io_tlb_pool *pool; + int start, i; + int index; + + *retpool = pool = &dev->dma_io_tlb_mem->defpool; + i = start = raw_smp_processor_id() & (pool->nareas - 1); + do { + index = swiotlb_search_pool_area(dev, pool, i, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) + return index; + if (++i >= pool->nareas) + i = 0; + } while (i != start); + return -1; } #endif /* CONFIG_SWIOTLB_DYNAMIC */ @@ -1237,17 +1339,43 @@ static unsigned long mem_used(struct io_tlb_mem *mem) #endif /* CONFIG_DEBUG_FS */ +/** + * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) physical IO buffer address + * @mapping_size: Requested size of the actual bounce buffer, excluding + * any pre- or post-padding for alignment + * @alloc_align_mask: Required start and end alignment of the allocated buffer + * @dir: DMA direction + * @attrs: Optional DMA attributes for the map operation + * + * Find and allocate a suitable sequence of IO TLB slots for the request. + * The allocated space starts at an alignment specified by alloc_align_mask, + * and the size of the allocated space is rounded up so that the total amount + * of allocated space is a multiple of (alloc_align_mask + 1). If + * alloc_align_mask is zero, the allocated space may be at any alignment and + * the size is not rounded up. + * + * The returned address is within the allocated space and matches the bits + * of orig_addr that are specified in the DMA min_align_mask for the device. As + * such, this returned address may be offset from the beginning of the allocated + * space. The bounce buffer space starting at the returned address for + * mapping_size bytes is initialized to the contents of the original IO buffer + * area. Any pre-padding (due to an offset) and any post-padding (due to + * rounding-up the size) is not initialized. + */ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - unsigned int alloc_align_mask, enum dma_data_direction dir, - unsigned long attrs) + size_t mapping_size, unsigned int alloc_align_mask, + enum dma_data_direction dir, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset; struct io_tlb_pool *pool; unsigned int i; + size_t size; int index; phys_addr_t tlb_addr; + unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, @@ -1258,52 +1386,73 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); - if (mapping_size > alloc_size) { - dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } + /* + * The default swiotlb memory pool is allocated with PAGE_SIZE + * alignment. If a mapping is requested with larger alignment, + * the mapping may be unable to use the initial slot(s) in all + * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request + * of or near the maximum mapping size would always fail. + */ + dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK, + "Alloc alignment may prevent fulfilling requests with max mapping_size\n"); - index = swiotlb_find_slots(dev, orig_addr, - alloc_size + offset, alloc_align_mask, &pool); + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); + size = ALIGN(mapping_size + offset, alloc_align_mask + 1); + index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, mem->nslabs, mem_used(mem)); + size, mem->nslabs, mem_used(mem)); return (phys_addr_t)DMA_MAPPING_ERROR; } /* + * If dma_skip_sync was set, reset it on first SWIOTLB buffer + * mapping to always sync SWIOTLB buffers. + */ + dma_reset_need_sync(dev); + + /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nr_slots(alloc_size + offset); i++) + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; + for (i = 0; i < (nr_slots(size) - pad_slots); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; /* - * When dir == DMA_FROM_DEVICE we could omit the copy from the orig - * to the tlb buffer, if we knew for sure the device will - * overwrite the entire current content. But we don't. Thus - * unconditional bounce may prevent leaking swiotlb content (i.e. - * kernel memory) to user-space. + * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy + * the original buffer to the TLB buffer before initiating DMA in order + * to preserve the original's data if the device does a partial write, + * i.e. if the device doesn't overwrite the entire buffer. Preserving + * the original data, even if it's garbage, is necessary to match + * hardware behavior. Use of swiotlb is supposed to be transparent, + * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes. */ - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); return tlb_addr; } -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *mem) { - struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; - unsigned int offset = swiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct io_tlb_area *area = &mem->areas[aindex]; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; int count, i; + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; + /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. @@ -1326,6 +1475,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } /* @@ -1348,29 +1498,29 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) * swiotlb_del_transient() - delete a transient memory pool * @dev: Device which mapped the buffer. * @tlb_addr: Physical address within a bounce buffer. + * @pool: Pointer to the transient memory pool to be checked and deleted. * * Check whether the address belongs to a transient SWIOTLB memory pool. * If yes, then delete the pool. * * Return: %true if @tlb_addr belonged to a transient pool that was released. */ -static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr) +static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr, + struct io_tlb_pool *pool) { - struct io_tlb_pool *pool; - - pool = swiotlb_find_pool(dev, tlb_addr); if (!pool->transient) return false; dec_used(dev->dma_io_tlb_mem, pool->nslabs); swiotlb_del_pool(dev, pool); + dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs); return true; } #else /* !CONFIG_SWIOTLB_DYNAMIC */ static inline bool swiotlb_del_transient(struct device *dev, - phys_addr_t tlb_addr) + phys_addr_t tlb_addr, struct io_tlb_pool *pool) { return false; } @@ -1380,36 +1530,39 @@ static inline bool swiotlb_del_transient(struct device *dev, /* * tlb_addr is the physical address of the bounce buffer to unmap. */ -void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, - size_t mapping_size, enum dma_data_direction dir, - unsigned long attrs) +void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool) { /* * First, sync the memory before unmapping the entry */ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, mapping_size, + DMA_FROM_DEVICE, pool); - if (swiotlb_del_transient(dev, tlb_addr)) + if (swiotlb_del_transient(dev, tlb_addr, pool)) return; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); } -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool); else BUG_ON(dir != DMA_FROM_DEVICE); } -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool) { if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) - swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE); + swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool); else BUG_ON(dir != DMA_TO_DEVICE); } @@ -1426,16 +1579,16 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size); - swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir, - attrs); + swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, - attrs | DMA_ATTR_SKIP_CPU_SYNC); + __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC, + swiotlb_find_pool(dev, swiotlb_addr)); dev_WARN_ONCE(dev, 1, "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); @@ -1506,6 +1659,23 @@ phys_addr_t default_swiotlb_limit(void) } #ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_SWIOTLB_DYNAMIC +static unsigned long mem_transient_used(struct io_tlb_mem *mem) +{ + return atomic_long_read(&mem->transient_nslabs); +} + +static int io_tlb_transient_used_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = data; + + *val = mem_transient_used(mem); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get, + NULL, "%llu\n"); +#endif /* CONFIG_SWIOTLB_DYNAMIC */ static int io_tlb_used_get(void *data, u64 *val) { @@ -1542,9 +1712,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get, static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, const char *dirname) { - atomic_long_set(&mem->total_used, 0); - atomic_long_set(&mem->used_hiwater, 0); - mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); if (!mem->nslabs) return; @@ -1554,6 +1721,10 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, &fops_io_tlb_used); debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem, &fops_io_tlb_hiwater); +#ifdef CONFIG_SWIOTLB_DYNAMIC + debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs, + mem, &fops_io_tlb_transient_used); +#endif } static int __init swiotlb_create_default_debugfs(void) @@ -1580,16 +1751,24 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_pool *pool; phys_addr_t tlb_addr; + unsigned int align; int index; if (!mem) return NULL; - index = swiotlb_find_slots(dev, 0, size, 0, &pool); + align = (1 << (get_order(size) + PAGE_SHIFT)) - 1; + index = swiotlb_find_slots(dev, 0, size, align, &pool); if (index == -1) return NULL; tlb_addr = slot_addr(pool->start, index); + if (unlikely(!PAGE_ALIGNED(tlb_addr))) { + dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", + &tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); + return NULL; + } return pfn_to_page(PFN_DOWN(tlb_addr)); } @@ -1597,11 +1776,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) bool swiotlb_free(struct device *dev, struct page *page, size_t size) { phys_addr_t tlb_addr = page_to_phys(page); + struct io_tlb_pool *pool; - if (!is_swiotlb_buffer(dev, tlb_addr)) + pool = swiotlb_find_pool(dev, tlb_addr); + if (!pool) return false; - swiotlb_release_slots(dev, tlb_addr); + swiotlb_release_slots(dev, tlb_addr, pool); return true; } @@ -1655,6 +1836,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, mem->for_alloc = true; #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock_init(&mem->lock); + INIT_LIST_HEAD_RCU(&mem->pools); #endif add_mem_pool(mem, pool); |
